SlideShare a Scribd company logo
Full Text Search in
PostgreSQL
Aleksander Alekseev
Agenda
● Intro
● Full text search basics
● Fuzzy full text search
● And some other topics
Intro
Full Text Search in PostgreSQL
Well-known FTS Solutions
● ElasticSearch
● Solr
● Sphinx
Why Use FTS in PostgreSQL
● More or less as good as specialized software
● No data duplication
● Data is always consistent
● No need to install and maintain anything except PostgreSQL
Full Text Search Basics
to_tsvector
# SELECT to_tsvector('No need to install and maintain anything except PostgreSQL');
'anyth':7 'except':8 'instal':4 'maintain':6 'need':2 'postgresql':9
(1 row)
# SELECT to_tsvector('russian',
'Не нужно устанавливать и поддерживать ничего кроме PostgreSQL');
'postgresql':8 'кром':7 'нужн':2 'поддержива':5 'устанавлива':3
(1 row)
to_tsquery
# SELECT to_tsquery('install | maintain');
'instal' | 'maintain'
(1 row)
# SELECT to_tsquery('russian', 'устанавливать & поддерживать');
'устанавлива' & 'поддержива'
(1 row)
plainto_tsquery & phraseto_tsquery
# SELECT plainto_tsquery('install maintain');
'instal' & 'maintain'
(1 row)
# SELECT phraseto_tsquery('russian', 'устанавливать поддерживать');
'устанавлива' <-> 'поддержива'
(1 row)
tsvector @@ tsquery
# SELECT to_tsvector('No need to install and maintain anything except
PostgreSQL') @@ plainto_tsquery('install maintain') AS match;
match
-------
t
Indexes: GIN or GiST?
GIN vs GiST:
● GIN
○ fast search, not very fast updates
○ better for static data
● GiST
○ slow search, faster updates
○ better for dynamic data
If you are not sure use GIN.
Practice: 1 / 3
CREATE TABLE IF NOT EXISTS
articles(id serial primary key, title varchar(128), content text);
-- https://meta.wikimedia.org/wiki/Data_dump_torrents#enwiki
-- https://github.com/afiskon/postgresql-fts-example
COPY articles FROM PROGRAM 'zcat /path/to/articles.copy.gz';
Practice: 2 / 3
CREATE OR REPLACE FUNCTION make_tsvector(title text, content text)
RETURNS tsvector AS $$
BEGIN
RETURN (setweight(to_tsvector('english', title),'A') ||
setweight(to_tsvector('english', content), 'B'));
END
$$ LANGUAGE 'plpgsql' IMMUTABLE;
Practice: 3 / 3
CREATE INDEX IF NOT EXISTS idx_fts_articles ON articles
USING gin(make_tsvector(title, content));
SELECT id, title FROM articles WHERE
make_tsvector(title, content) @@ to_tsquery('bjarne <-> stroustrup');
2470 | Binary search algorithm
2129 | Bell Labs
2130 | Bjarne Stroustrup
3665 | C (programming language)
ts_headline: 1 / 2
SELECT id, ts_headline(title, q) FROM articles,
to_tsquery('bjarne <-> stroustrup') AS q -- !!!
WHERE make_tsvector(title, content) @@ q;
2470 | Binary search algorithm
2129 | Bell Labs
2130 | <b>Bjarne</b> <b>Stroustrup</b>
ts_headline: 2 / 2
SELECT id, ts_headline(title, q, 'StartSel=<em>, StopSel=</em>') -- !!!
FROM articles, to_tsquery('bjarne <-> stroustrup') as q
WHERE make_tsvector(title, content) @@ q;
2470 | Binary search algorithm
2129 | Bell Labs
2130 | <em>Bjarne</em> <em>Stroustrup</em>
ts_rank
SELECT id, ts_headline(title, q, 'StartSel=<em>, StopSel=</em>')
FROM articles, to_tsquery('bjarne <-> stroustrup') as q
WHERE make_tsvector(title, content) @@ q
ORDER BY ts_rank(make_tsvector(title, content), q) DESC;
2130 | <em>Bjarne</em> <em>Stroustrup</em>
3665 | C (programming language)
6266 | Edsger W. Dijkstra
RUM
$ git clone git@github.com:postgrespro/rum.git
$ cd rum
$ USE_PGXS=1 make install
$ USE_PGXS=1 make installcheck
psql> CREATE EXTENSION rum;
Fuzzy Full Text Search
pg_trgm: 1 / 4
create extension pg_trgm;
create index articles_trgm_idx on articles using gin (title gin_trgm_ops);
pg_trgm: 2 / 4
select show_trgm(title) from articles limit 3;
show_trgm | {" a"," ac",acc,ble,cce,ces,com,eco,ess,ibl,ing,lec,mpu,...
show_trgm | {" a"," an",ana,arc,chi,his,ism,nar,rch,"sm "}
show_trgm | {" a"," af",afg,anh,ani,fgh,gha,han,his,ist,nhi,nis,ory,...
pg_trgm: 3 / 4
select title, similarity(title, 'Straustrup') from articles where title % 'Straustrup';
-[ RECORD 1 ]-----------------
title | Bjarne Stroustrup
similarity | 0.35
pg_trgm: 4 / 4
psql> select show_limit();
-[ RECORD 1 ]---
show_limit | 0.3
psql> select set_limit(0.4);
-[ RECORD 1 ]--
set_limit | 0.4
pg_trgm: like / ilike queries
# explain select title from articles where title LIKE '%Stroustrup%';
QUERY PLAN
---------------------------------------------------------------------------------
Bitmap Heap Scan on articles (cost=60.02..71.40 rows=3 width=16)
Recheck Cond: ((title)::text ~~ '%Stroustrup%'::text)
-> Bitmap Index Scan on articles_trgm_idx (cost=0.00..60.02 rows=3...
Index Cond: ((title)::text ~~ '%Stroustrup%'::text)
pg_trgm: regular expressions
# explain select title from articles where title ~* 'Stroustrup';
QUERY PLAN
---------------------------------------------------------------------------------
Bitmap Heap Scan on articles (cost=60.02..71.40 rows=3 width=16)
Recheck Cond: ((title)::text ~* 'Stroustrup'::text)
-> Bitmap Index Scan on articles_trgm_idx (cost=0.00..60.02 rows=3...
Index Cond: ((title)::text ~* 'Stroustrup'::text)
See also
● The pg_trgm module provides functions and operators for determining the
similarity of alphanumeric text based on trigram matching
○ https://www.postgresql.org/docs/current/static/pgtrgm.html
● Full Text Search support for JSON and JSONB
○ https://www.depesz.com/2017/04/04/waiting-for-postgresql-10-full-text-search-support-for-json
-and-jsonb/
● RUM access method
○ https://github.com/postgrespro/rum
Thank you for your attention!
● http://eax.me/
● http://devzen.ru/
Bonus Slide!
GIN & arrays
create table vec_test(id serial primary key, tags int[]);
create index vec_test_gin on vec_test using gin(tags);
insert into vec_test (tags) values ('{111,222,333}');
select * from vec_test where '{111}' <@ tags;
select * from vec_test where '{111}' @> tags;
select * from vec_test where '{111}' = tags;
-- intersection is not empty
select * from vec_test where '{111}' && tags;

More Related Content

PDF
Performance Tuning Oracle Weblogic Server 12c
PPTX
What you need to know for postgresql operation
PDF
[Pgday.Seoul 2021] 2. Porting Oracle UDF and Optimization
PDF
[pgday.Seoul 2022] PostgreSQL with Google Cloud
PDF
PostgreSQL Advanced Queries
PPTX
Introduction to PostgreSQL
PDF
[Pgday.Seoul 2020] SQL Tuning
PDF
Backup and-recovery2
Performance Tuning Oracle Weblogic Server 12c
What you need to know for postgresql operation
[Pgday.Seoul 2021] 2. Porting Oracle UDF and Optimization
[pgday.Seoul 2022] PostgreSQL with Google Cloud
PostgreSQL Advanced Queries
Introduction to PostgreSQL
[Pgday.Seoul 2020] SQL Tuning
Backup and-recovery2

What's hot (20)

PPTX
PostgreSQL Database Slides
PDF
Splunk Architecture | Splunk Tutorial For Beginners | Splunk Training | Splun...
PDF
MariaDB 마이그레이션 - 네오클로바
PDF
SQL injection: Not Only AND 1=1 (updated)
PDF
Koalas: Making an Easy Transition from Pandas to Apache Spark
PDF
PostgreSQL Internals (1) for PostgreSQL 9.6 (English)
PDF
How the Postgres Query Optimizer Works
 
PDF
Nikita Abdullin - Reverse-engineering of embedded MIPS devices. Case Study - ...
PDF
게임사를 위한 Amazon GameLift 세션 - 이정훈, AWS 솔루션즈 아키텍트
PDF
Top 10 Mistakes When Migrating From Oracle to PostgreSQL
PPTX
Running Airflow Workflows as ETL Processes on Hadoop
PDF
Migrating Oracle database to PostgreSQL
PDF
Mastering PostgreSQL Administration
 
ODP
Introduction to PostgreSQL
PDF
Full Text Search In PostgreSQL
PPTX
Debugging Android Native Library
PPTX
Time series Analytics - a deep dive into ADX Azure Data Explorer @Data Saturd...
PPTX
Postgre sql best_practices
PDF
Introduction to MongoDB
PDF
How does PostgreSQL work with disks: a DBA's checklist in detail. PGConf.US 2015
PostgreSQL Database Slides
Splunk Architecture | Splunk Tutorial For Beginners | Splunk Training | Splun...
MariaDB 마이그레이션 - 네오클로바
SQL injection: Not Only AND 1=1 (updated)
Koalas: Making an Easy Transition from Pandas to Apache Spark
PostgreSQL Internals (1) for PostgreSQL 9.6 (English)
How the Postgres Query Optimizer Works
 
Nikita Abdullin - Reverse-engineering of embedded MIPS devices. Case Study - ...
게임사를 위한 Amazon GameLift 세션 - 이정훈, AWS 솔루션즈 아키텍트
Top 10 Mistakes When Migrating From Oracle to PostgreSQL
Running Airflow Workflows as ETL Processes on Hadoop
Migrating Oracle database to PostgreSQL
Mastering PostgreSQL Administration
 
Introduction to PostgreSQL
Full Text Search In PostgreSQL
Debugging Android Native Library
Time series Analytics - a deep dive into ADX Azure Data Explorer @Data Saturd...
Postgre sql best_practices
Introduction to MongoDB
How does PostgreSQL work with disks: a DBA's checklist in detail. PGConf.US 2015
Ad

Similar to Full Text Search in PostgreSQL (20)

PPTX
PostgreSQL - It's kind've a nifty database
PDF
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
PDF
Better Full Text Search in PostgreSQL
PDF
Новые возможности полнотекстового поиска в PostgreSQL / Олег Бартунов (Postgr...
PDF
Postgresql search demystified
PDF
The State of (Full) Text Search in PostgreSQL 12
PDF
Pgbr 2013 fts
PDF
PDF
Rank Your Results with PostgreSQL Full Text Search (from PGConf2015)
PPTX
Postgres indexes: how to make them work for your application
PDF
Ten Reasons Why You Should Prefer PostgreSQL to MySQL
PDF
JDD 2016 - Tomasz Borek - DB for next project? Why, Postgres, of course
PPTX
Syntactic sugar in postgre sql
PPTX
Full Text search in Django with Postgres
PDF
PostgreSQL: Advanced indexing
PDF
PyCon Russian 2015 - Dive into full text search with python.
PDF
Syntactic sugar in Postgre SQL
PDF
Indexing Complex PostgreSQL Data Types
PDF
Postgres vs Elasticsearch while enriching data - Vlad Somov | Ruby Meditaiton...
PPTX
Postgres indexes
PostgreSQL - It's kind've a nifty database
Полнотекстовый поиск в PostgreSQL за миллисекунды (Олег Бартунов, Александр К...
Better Full Text Search in PostgreSQL
Новые возможности полнотекстового поиска в PostgreSQL / Олег Бартунов (Postgr...
Postgresql search demystified
The State of (Full) Text Search in PostgreSQL 12
Pgbr 2013 fts
Rank Your Results with PostgreSQL Full Text Search (from PGConf2015)
Postgres indexes: how to make them work for your application
Ten Reasons Why You Should Prefer PostgreSQL to MySQL
JDD 2016 - Tomasz Borek - DB for next project? Why, Postgres, of course
Syntactic sugar in postgre sql
Full Text search in Django with Postgres
PostgreSQL: Advanced indexing
PyCon Russian 2015 - Dive into full text search with python.
Syntactic sugar in Postgre SQL
Indexing Complex PostgreSQL Data Types
Postgres vs Elasticsearch while enriching data - Vlad Somov | Ruby Meditaiton...
Postgres indexes
Ad

More from Aleksander Alekseev (13)

PDF
Growing up new PostgreSQL developers (pgcon.org 2018)
PDF
PostgreSQL and Compressed Documents (pgconf.ru 2018)
PDF
PostgreSQL Sharding and HA: Theory and Practice (PGConf.ASIA 2017)
PDF
Data recovery using pg_filedump
PDF
pg_filedump
PDF
Quality Assurance in PostgreSQL
PDF
In-core compression: how to shrink your database size in several times
PDF
ZSON, или прозрачное сжатие JSON
PDF
Профилирование кода на C/C++ в *nix системах
PDF
Новые технологии репликации данных в PostgreSQL - Александр Алексеев
PDF
Haskell - это просто - Александр Алексеев
PDF
Работа с Akka Cluster - Александр Алексеев
PDF
Функциональное программирование - Александр Алексеев
Growing up new PostgreSQL developers (pgcon.org 2018)
PostgreSQL and Compressed Documents (pgconf.ru 2018)
PostgreSQL Sharding and HA: Theory and Practice (PGConf.ASIA 2017)
Data recovery using pg_filedump
pg_filedump
Quality Assurance in PostgreSQL
In-core compression: how to shrink your database size in several times
ZSON, или прозрачное сжатие JSON
Профилирование кода на C/C++ в *nix системах
Новые технологии репликации данных в PostgreSQL - Александр Алексеев
Haskell - это просто - Александр Алексеев
Работа с Akka Cluster - Александр Алексеев
Функциональное программирование - Александр Алексеев

Recently uploaded (20)

PPTX
A Presentation on Artificial Intelligence
PDF
Video forgery: An extensive analysis of inter-and intra-frame manipulation al...
PDF
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
PDF
Profit Center Accounting in SAP S/4HANA, S4F28 Col11
PDF
Encapsulation theory and applications.pdf
PPTX
Programs and apps: productivity, graphics, security and other tools
PDF
Building Integrated photovoltaic BIPV_UPV.pdf
PDF
Approach and Philosophy of On baking technology
PDF
The Rise and Fall of 3GPP – Time for a Sabbatical?
PPTX
Digital-Transformation-Roadmap-for-Companies.pptx
PPT
Teaching material agriculture food technology
PPTX
Tartificialntelligence_presentation.pptx
PPTX
SOPHOS-XG Firewall Administrator PPT.pptx
PDF
Empathic Computing: Creating Shared Understanding
PDF
MIND Revenue Release Quarter 2 2025 Press Release
PPTX
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
PDF
Getting Started with Data Integration: FME Form 101
PDF
Dropbox Q2 2025 Financial Results & Investor Presentation
PDF
Diabetes mellitus diagnosis method based random forest with bat algorithm
PPTX
1. Introduction to Computer Programming.pptx
A Presentation on Artificial Intelligence
Video forgery: An extensive analysis of inter-and intra-frame manipulation al...
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
Profit Center Accounting in SAP S/4HANA, S4F28 Col11
Encapsulation theory and applications.pdf
Programs and apps: productivity, graphics, security and other tools
Building Integrated photovoltaic BIPV_UPV.pdf
Approach and Philosophy of On baking technology
The Rise and Fall of 3GPP – Time for a Sabbatical?
Digital-Transformation-Roadmap-for-Companies.pptx
Teaching material agriculture food technology
Tartificialntelligence_presentation.pptx
SOPHOS-XG Firewall Administrator PPT.pptx
Empathic Computing: Creating Shared Understanding
MIND Revenue Release Quarter 2 2025 Press Release
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
Getting Started with Data Integration: FME Form 101
Dropbox Q2 2025 Financial Results & Investor Presentation
Diabetes mellitus diagnosis method based random forest with bat algorithm
1. Introduction to Computer Programming.pptx

Full Text Search in PostgreSQL

  • 1. Full Text Search in PostgreSQL Aleksander Alekseev
  • 2. Agenda ● Intro ● Full text search basics ● Fuzzy full text search ● And some other topics
  • 5. Well-known FTS Solutions ● ElasticSearch ● Solr ● Sphinx
  • 6. Why Use FTS in PostgreSQL ● More or less as good as specialized software ● No data duplication ● Data is always consistent ● No need to install and maintain anything except PostgreSQL
  • 8. to_tsvector # SELECT to_tsvector('No need to install and maintain anything except PostgreSQL'); 'anyth':7 'except':8 'instal':4 'maintain':6 'need':2 'postgresql':9 (1 row) # SELECT to_tsvector('russian', 'Не нужно устанавливать и поддерживать ничего кроме PostgreSQL'); 'postgresql':8 'кром':7 'нужн':2 'поддержива':5 'устанавлива':3 (1 row)
  • 9. to_tsquery # SELECT to_tsquery('install | maintain'); 'instal' | 'maintain' (1 row) # SELECT to_tsquery('russian', 'устанавливать & поддерживать'); 'устанавлива' & 'поддержива' (1 row)
  • 10. plainto_tsquery & phraseto_tsquery # SELECT plainto_tsquery('install maintain'); 'instal' & 'maintain' (1 row) # SELECT phraseto_tsquery('russian', 'устанавливать поддерживать'); 'устанавлива' <-> 'поддержива' (1 row)
  • 11. tsvector @@ tsquery # SELECT to_tsvector('No need to install and maintain anything except PostgreSQL') @@ plainto_tsquery('install maintain') AS match; match ------- t
  • 12. Indexes: GIN or GiST? GIN vs GiST: ● GIN ○ fast search, not very fast updates ○ better for static data ● GiST ○ slow search, faster updates ○ better for dynamic data If you are not sure use GIN.
  • 13. Practice: 1 / 3 CREATE TABLE IF NOT EXISTS articles(id serial primary key, title varchar(128), content text); -- https://meta.wikimedia.org/wiki/Data_dump_torrents#enwiki -- https://github.com/afiskon/postgresql-fts-example COPY articles FROM PROGRAM 'zcat /path/to/articles.copy.gz';
  • 14. Practice: 2 / 3 CREATE OR REPLACE FUNCTION make_tsvector(title text, content text) RETURNS tsvector AS $$ BEGIN RETURN (setweight(to_tsvector('english', title),'A') || setweight(to_tsvector('english', content), 'B')); END $$ LANGUAGE 'plpgsql' IMMUTABLE;
  • 15. Practice: 3 / 3 CREATE INDEX IF NOT EXISTS idx_fts_articles ON articles USING gin(make_tsvector(title, content)); SELECT id, title FROM articles WHERE make_tsvector(title, content) @@ to_tsquery('bjarne <-> stroustrup'); 2470 | Binary search algorithm 2129 | Bell Labs 2130 | Bjarne Stroustrup 3665 | C (programming language)
  • 16. ts_headline: 1 / 2 SELECT id, ts_headline(title, q) FROM articles, to_tsquery('bjarne <-> stroustrup') AS q -- !!! WHERE make_tsvector(title, content) @@ q; 2470 | Binary search algorithm 2129 | Bell Labs 2130 | <b>Bjarne</b> <b>Stroustrup</b>
  • 17. ts_headline: 2 / 2 SELECT id, ts_headline(title, q, 'StartSel=<em>, StopSel=</em>') -- !!! FROM articles, to_tsquery('bjarne <-> stroustrup') as q WHERE make_tsvector(title, content) @@ q; 2470 | Binary search algorithm 2129 | Bell Labs 2130 | <em>Bjarne</em> <em>Stroustrup</em>
  • 18. ts_rank SELECT id, ts_headline(title, q, 'StartSel=<em>, StopSel=</em>') FROM articles, to_tsquery('bjarne <-> stroustrup') as q WHERE make_tsvector(title, content) @@ q ORDER BY ts_rank(make_tsvector(title, content), q) DESC; 2130 | <em>Bjarne</em> <em>Stroustrup</em> 3665 | C (programming language) 6266 | Edsger W. Dijkstra
  • 19. RUM $ git clone [email protected]:postgrespro/rum.git $ cd rum $ USE_PGXS=1 make install $ USE_PGXS=1 make installcheck psql> CREATE EXTENSION rum;
  • 20. Fuzzy Full Text Search
  • 21. pg_trgm: 1 / 4 create extension pg_trgm; create index articles_trgm_idx on articles using gin (title gin_trgm_ops);
  • 22. pg_trgm: 2 / 4 select show_trgm(title) from articles limit 3; show_trgm | {" a"," ac",acc,ble,cce,ces,com,eco,ess,ibl,ing,lec,mpu,... show_trgm | {" a"," an",ana,arc,chi,his,ism,nar,rch,"sm "} show_trgm | {" a"," af",afg,anh,ani,fgh,gha,han,his,ist,nhi,nis,ory,...
  • 23. pg_trgm: 3 / 4 select title, similarity(title, 'Straustrup') from articles where title % 'Straustrup'; -[ RECORD 1 ]----------------- title | Bjarne Stroustrup similarity | 0.35
  • 24. pg_trgm: 4 / 4 psql> select show_limit(); -[ RECORD 1 ]--- show_limit | 0.3 psql> select set_limit(0.4); -[ RECORD 1 ]-- set_limit | 0.4
  • 25. pg_trgm: like / ilike queries # explain select title from articles where title LIKE '%Stroustrup%'; QUERY PLAN --------------------------------------------------------------------------------- Bitmap Heap Scan on articles (cost=60.02..71.40 rows=3 width=16) Recheck Cond: ((title)::text ~~ '%Stroustrup%'::text) -> Bitmap Index Scan on articles_trgm_idx (cost=0.00..60.02 rows=3... Index Cond: ((title)::text ~~ '%Stroustrup%'::text)
  • 26. pg_trgm: regular expressions # explain select title from articles where title ~* 'Stroustrup'; QUERY PLAN --------------------------------------------------------------------------------- Bitmap Heap Scan on articles (cost=60.02..71.40 rows=3 width=16) Recheck Cond: ((title)::text ~* 'Stroustrup'::text) -> Bitmap Index Scan on articles_trgm_idx (cost=0.00..60.02 rows=3... Index Cond: ((title)::text ~* 'Stroustrup'::text)
  • 27. See also ● The pg_trgm module provides functions and operators for determining the similarity of alphanumeric text based on trigram matching ○ https://www.postgresql.org/docs/current/static/pgtrgm.html ● Full Text Search support for JSON and JSONB ○ https://www.depesz.com/2017/04/04/waiting-for-postgresql-10-full-text-search-support-for-json -and-jsonb/ ● RUM access method ○ https://github.com/postgrespro/rum
  • 28. Thank you for your attention! ● http://eax.me/ ● http://devzen.ru/
  • 30. GIN & arrays create table vec_test(id serial primary key, tags int[]); create index vec_test_gin on vec_test using gin(tags); insert into vec_test (tags) values ('{111,222,333}'); select * from vec_test where '{111}' <@ tags; select * from vec_test where '{111}' @> tags; select * from vec_test where '{111}' = tags; -- intersection is not empty select * from vec_test where '{111}' && tags;