From 397a9b96670097df72f95af04687b1874fb6ae31 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tv@fuzzy.cz>
Date: Sun, 11 Jan 2015 20:18:24 +0100
Subject: [PATCH 4/4] multivariate histograms

- extends the pg_mv_statistic catalog (add 'hist' fields)
- building the histograms during ANALYZE
- simple estimation while planning the queries

FIX: don't build histogram by default
FIX: analyze histogram only when requested
FIX: improvements in clauselist_mv_selectivity_histogram()
FIX: minor cleanup in multivariate histograms

- move BUCKET_SIZE_SERIALIZED() macro to histogram.c
- rename MVHIST_ constants to MVSTAT_HIST

FIX: comment about building histograms on too many columns
FIX: added comment about check in ALTER TABLE ... ADD STATISTICS
FIX: added comment about handling DROP TABLE / DROP COLUMN
FIX: added comment about ALTER TABLE ... DROP STATISTICS
FIX: comment about building NULL-buckets for a histogram
FIX: initial support for all data types and NULL in MV histograms

This changes the serialize_histogram/update_mv_stats in a bit
strange way (passing VacAttrStats all over the place). This
needs to be improved, somehow, before rebasing into the
histogram part. Otherwise it'll cause needless conflicts.

FIX: refactoring lookup_var_attr_stats() / histograms
FIX: a set of regression tests for MV histograms

This is mostly equal to a combination of all the regression tests
for functional dependencies / MCV lists.

The last test fails due to Assert(!isNull) in partition_bucket()
which prevents NULL values in histograms.

FIX: remove the two memcmp-based comparators (used for histograms)
FIX: comment about memory corruption in deserializing histograms
FIX: remove CompareScalarsContext/ScalarMCVItem from common.h
FIX: fix the lookup_vac_attr() refactoring in histograms
FIX: get rid of the custom comparators in histogram.c
FIX: building NULL-buckets - buckets with just NULLs in some dimension(s)
FIX: fixed bugs in serialize/deserialize methods for histogram

When serializing, BUCKET_MIN_INDEXES were set twice (once instead
of BUCKET_MAX_INDEXES, which were not set at all).

When deserializing, the 'tmp' pointer was not advanced, so just
the first bucket was ever deserialized (and copied into all the
histogram buckets).

Added a few asserts into deserialize method, similarly to how
it's done in serialize.

FIX: formatting issues in the histogram regression test
FIX: remove sample-dependent results from histogram regression test
FIX: add USE_ASSERT_CHECKING to assert-only variable (histogram)
FIX: check ADD STATISTICS options (histograms)
FIX: improved comments/docs for the multivariate histograms
FIX: reuse DimensionInfo (after move to common.h)
FIX: remove obsolete TODO about NULL-buckets, improve comments
FIX: move multivariate histogram definitions after MCV lists
FIX: correct variable names in error message (dimension index 'j')
FIX: add support for 'IS [NOT] NULL' support to histograms
FIX: add regression test for ADD STATISTICS options (histograms)
FIX: added regression test to test IS [NOT] NULL with histograms
FIX: make regression tests parallel-happy (histograms)
---
 src/backend/catalog/system_views.sql       |    4 +-
 src/backend/commands/tablecmds.c           |   55 +-
 src/backend/optimizer/path/clausesel.c     |  391 +++++-
 src/backend/utils/mvstats/Makefile         |    2 +-
 src/backend/utils/mvstats/common.c         |   67 +-
 src/backend/utils/mvstats/common.h         |   14 -
 src/backend/utils/mvstats/histogram.c      | 1778 ++++++++++++++++++++++++++++
 src/backend/utils/mvstats/mcv.c            |    1 +
 src/include/catalog/pg_mv_statistic.h      |   24 +-
 src/include/catalog/pg_proc.h              |    2 +
 src/include/utils/mvstats.h                |   99 +-
 src/test/regress/expected/mv_histogram.out |  210 ++++
 src/test/regress/expected/rules.out        |    4 +-
 src/test/regress/parallel_schedule         |    2 +-
 src/test/regress/serial_schedule           |    1 +
 src/test/regress/sql/mv_histogram.sql      |  179 +++
 16 files changed, 2776 insertions(+), 57 deletions(-)
 create mode 100644 src/backend/utils/mvstats/histogram.c
 create mode 100644 src/test/regress/expected/mv_histogram.out
 create mode 100644 src/test/regress/sql/mv_histogram.sql

diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 8acf160..3aa7d2b 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -160,7 +160,9 @@ CREATE VIEW pg_mv_stats AS
         length(S.stadeps) as depsbytes,
         pg_mv_stats_dependencies_info(S.stadeps) as depsinfo,
         length(S.stamcv) AS mcvbytes,
-        pg_mv_stats_mcvlist_info(S.stamcv) AS mcvinfo
+        pg_mv_stats_mcvlist_info(S.stamcv) AS mcvinfo,
+        length(S.stahist) AS histbytes,
+        pg_mv_stats_histogram_info(S.stahist) AS histinfo
     FROM (pg_mv_statistic S JOIN pg_class C ON (C.oid = S.starelid))
         LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 1f08c1c..2bd3884 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -11635,6 +11635,16 @@ static int compare_int16(const void *a, const void *b)
  *       multiple stats on the same columns with different options
  *       (say, a detailed MCV-only stats for some queries, histogram
  *       for others, etc.)
+ *
+ * FIXME Check that at least one of the statistic types is enabled, and
+ *       that only compatible options are used. For example if 'mcv' is
+ *       not selected, then 'mcv_max_items' can't be used (alternative
+ *       might be to enable it automatically).
+ *
+ * TODO It might be useful to have ALTER TABLE DROP STATISTICS too, but
+ *      it's tricky because there may be multiple kinds of stats for the
+ *      same list of columns, with different options (e.g. one just MCV
+ *      list, another with histogram, etc.).
  */
 static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 						StatisticsDef *def, LOCKMODE lockmode)
@@ -11652,12 +11662,15 @@ static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 
 	/* by default build everything */
 	bool 	build_dependencies = false,
-			build_mcv = false;
+			build_mcv = false,
+			build_histogram = false;
 
-	int32 	max_mcv_items = -1;
+	int32 	max_buckets = -1,
+			max_mcv_items = -1;
 
 	/* options required because of other options */
-	bool	require_mcv = false;
+	bool	require_mcv = false,
+			require_histogram = false;
 
 	Assert(IsA(def, StatisticsDef));
 
@@ -11735,6 +11748,29 @@ static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 								MVSTAT_MCVLIST_MAX_ITEMS)));
 
 		}
+		else if (strcmp(opt->defname, "histogram") == 0)
+			build_histogram = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "max_buckets") == 0)
+		{
+			max_buckets = defGetInt32(opt);
+
+			/* this option requires 'histogram' to be enabled */
+			require_histogram = true;
+
+			/* sanity check */
+			if (max_buckets < MVSTAT_HIST_MIN_BUCKETS)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("minimum number of buckets is %d",
+								MVSTAT_HIST_MIN_BUCKETS)));
+
+			else if (max_buckets > MVSTAT_HIST_MAX_BUCKETS)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("minimum number of buckets is %d",
+								MVSTAT_HIST_MAX_BUCKETS)));
+
+		}
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -11743,10 +11779,10 @@ static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 	}
 
 	/* check that at least some statistics were requested */
-	if (! (build_dependencies || build_mcv))
+	if (! (build_dependencies || build_mcv || build_histogram))
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("no statistics type (dependencies, mcv) was requested")));
+				 errmsg("no statistics type (dependencies, mcv, histogram) was requested")));
 
 	/* now do some checking of the options */
 	if (require_mcv && (! build_mcv))
@@ -11754,6 +11790,11 @@ static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("option 'mcv' is required by other options(s)")));
 
+	if (require_histogram && (! build_histogram))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("option 'histogram' is required by other options(s)")));
+
 	/* sort the attnums and build int2vector */
 	qsort(attnums, numcols, sizeof(int16), compare_int16);
 	stakeys = buildint2vector(attnums, numcols);
@@ -11771,10 +11812,14 @@ static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 
 	values[Anum_pg_mv_statistic_deps_enabled -1] = BoolGetDatum(build_dependencies);
 	values[Anum_pg_mv_statistic_mcv_enabled   -1] = BoolGetDatum(build_mcv);
+	values[Anum_pg_mv_statistic_hist_enabled  -1] = BoolGetDatum(build_histogram);
+
 	values[Anum_pg_mv_statistic_mcv_max_items    -1] = Int32GetDatum(max_mcv_items);
+	values[Anum_pg_mv_statistic_hist_max_buckets -1] = Int32GetDatum(max_buckets);
 
 	nulls[Anum_pg_mv_statistic_stadeps -1]  = true;
 	nulls[Anum_pg_mv_statistic_stamcv   -1] = true;
+	nulls[Anum_pg_mv_statistic_stahist  -1] = true;
 
 	/* insert the tuple into pg_mv_statistic */
 	mvstatrel = heap_open(MvStatisticRelationId, RowExclusiveLock);
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 1446fa0..a4e6d16 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -70,6 +70,8 @@ static Selectivity clauselist_mv_selectivity(PlannerInfo *root,
 static Selectivity clauselist_mv_selectivity_mcvlist(PlannerInfo *root,
 									List *clauses, MVStats mvstats,
 									bool *fullmatch, Selectivity *lowsel);
+static Selectivity clauselist_mv_selectivity_histogram(PlannerInfo *root,
+									List *clauses, MVStats mvstats);
 
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
@@ -1119,6 +1121,7 @@ static Selectivity
 clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStats mvstats)
 {
 	bool fullmatch = false;
+	Selectivity s1 = 0.0, s2 = 0.0;
 
 	/*
 	 * Lowest frequency in the MCV list (may be used as an upper bound
@@ -1132,9 +1135,24 @@ clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStats mvstats)
 	 *      MCV/histogram evaluation). 
 	 */
 
-	/* Evaluate the MCV selectivity */
-	return clauselist_mv_selectivity_mcvlist(root, clauses, mvstats,
+	/* Evaluate the MCV first. */
+	s1 = clauselist_mv_selectivity_mcvlist(root, clauses, mvstats,
 										   &fullmatch, &mcv_low);
+
+	/*
+	 * If we got a full equality match on the MCV list, we're done (and
+	 * the estimate is pretty good).
+	 */
+	if (fullmatch && (s1 > 0.0))
+		return s1;
+
+	/* FIXME if (fullmatch) without matching MCV item, use the mcv_low
+	 *       selectivity as upper bound */
+
+	s2 = clauselist_mv_selectivity_histogram(root, clauses, mvstats);
+
+	/* TODO clamp to <= 1.0 (or more strictly, when possible) */
+	return s1 + s2;
 }
 
 /*
@@ -2024,3 +2042,372 @@ clauselist_mv_selectivity_mcvlist(PlannerInfo *root, List *clauses,
 
 	return s;
 }
+
+/*
+ * Estimate selectivity of clauses using a histogram.
+ *
+ * If there's no histogram for the stats, the function returns 0.0.
+ *
+ * The general idea of this method is similar to how MCV lists are
+ * processed, except that this introduces the concept of a partial
+ * match (MCV only works with full match / mismatch).
+ *
+ * The algorithm works like this:
+ *
+ *   1) mark all buckets as 'full match'
+ *   2) walk through all the clauses
+ *   3) for a particular clause, walk through all the buckets
+ *   4) skip buckets that are already 'no match'
+ *   5) check clause for buckets that still match (at least partially)
+ *   6) sum frequencies for buckets to get selectivity
+ *
+ * Unlike MCV lists, histograms have a concept of a partial match.
+ *
+ * TODO This only handles AND-ed clauses, but it might work for OR-ed
+ *      lists too - it just needs to reverse the logic a bit. I.e. start
+ *      with 'no match' for all buckets, and increase the match level
+ *      for the clauses (and skip buckets that are 'full match').
+ *
+ * TODO This might use a similar shortcut to MCV lists - count buckets
+ *      marked as partial/full match, and terminate once this drop to 0.
+ *      Not sure if it's really worth it - for MCV lists a situation like
+ *      this is not uncommon, but for histograms it's not that clear.
+ */
+static Selectivity
+clauselist_mv_selectivity_histogram(PlannerInfo *root, List *clauses,
+									MVStats mvstats)
+{
+	int i;
+	Selectivity s = 0.0;
+	ListCell * l;
+	char *matches = NULL;
+	MVHistogram mvhist = NULL;
+
+	/* there's no histogram */
+	if (! mvstats->hist_built)
+		return 0.0;
+
+	/* There may be no histogram in the stats (check hist_built flag) */
+	mvhist = deserialize_mv_histogram(fetch_mv_histogram(mvstats->mvoid));
+
+	Assert (mvhist != NULL);
+	Assert (clauses != NIL);
+	Assert (list_length(clauses) >= 2);
+
+	/*
+	 * Bitmap of bucket matches (mismatch, partial, full). by default
+	 * all buckets fully match (and we'll eliminate them).
+	 */
+	matches = palloc0(sizeof(char) * mvhist->nbuckets);
+	memset(matches,  MVSTATS_MATCH_FULL, sizeof(char)*mvhist->nbuckets);
+
+	/* loop through the clauses and do the estimation */
+	foreach (l, clauses)
+	{
+		Node * clause = (Node*)lfirst(l);
+
+		/* it's either OpClause, or NullTest */
+		if (is_opclause(clause))
+		{
+			OpExpr * expr = (OpExpr*)clause;
+			bool		varonleft = true;
+			bool		ok;
+
+			FmgrInfo	opproc;			/* operator */
+			fmgr_info(get_opcode(expr->opno), &opproc);
+
+			ok = (NumRelids(clause) == 1) &&
+				 (is_pseudo_constant_clause(lsecond(expr->args)) ||
+				 (varonleft = false,
+				  is_pseudo_constant_clause(linitial(expr->args))));
+
+			if (ok)
+			{
+				FmgrInfo	ltproc;
+				Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+				Const * cst = (varonleft) ? lsecond(expr->args) : linitial(expr->args);
+				bool isgt = (! varonleft);
+
+				/*
+				 * TODO Fetch only when really needed (probably for equality only)
+				 *
+				 * TODO Technically either lt/gt is sufficient.
+				 * 
+				 * FIXME The code in analyze.c creates histograms only for types
+				 *       with enough ordering (by calling get_sort_group_operators).
+				 *       Is this the same assumption, i.e. are we certain that we
+				 *       get the ltproc/gtproc every time we ask? Or are there types
+				 *       where get_sort_group_operators returns ltopr and here we
+				 *       get nothing?
+				 */
+				TypeCacheEntry *typecache
+					= lookup_type_cache(var->vartype, TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR
+																	   | TYPECACHE_GT_OPR);
+
+				/* lookup dimension for the attribute */
+				int idx = mv_get_index(var->varattno, mvstats->stakeys);
+
+				fmgr_info(get_opcode(typecache->lt_opr), &ltproc);
+
+				/*
+				 * Check this for all buckets that still have "true" in the bitmap
+				 * 
+				 * We already know the clauses use suitable operators (because that's
+				 * how we filtered them).
+				 */
+				for (i = 0; i < mvhist->nbuckets; i++)
+				{
+					bool tmp;
+					MVBucket bucket = mvhist->buckets[i];
+
+					/*
+					 * Skip buckets that were already eliminated - this is impotant
+					 * considering how we update the info (we only lower the match)
+					 */
+					if (matches[i] == MVSTATS_MATCH_NONE)
+						continue;
+
+					/*
+					* If it's not a "<" or ">" or "=" operator, just ignore the
+					* clause. Otherwise note the relid and attnum for the variable.
+					*
+					* TODO I'm really unsure the handling of 'isgt' flag (that is, clauses
+					*      with reverse order of variable/constant) is correct. I wouldn't
+					*      be surprised if there was some mixup. Using the lt/gt operators
+					*      instead of messing with the opproc could make it simpler.
+					*      It would however be using a different operator than the query,
+					*      although it's not any shadier than using the selectivity function
+					*      as is done currently.
+					*
+					* FIXME Once the min/max values are deduplicated, we can easily minimize
+					*       the number of calls to the comparator (assuming we keep the
+					*       deduplicated structure). See the note on compression at MVBucket
+					*       serialize/deserialize methods.
+					*/
+					switch (get_oprrest(expr->opno))
+					{
+						case F_SCALARLTSEL:	/* column < constant */
+
+							if (! isgt)	/* (var < const) */
+							{
+								/*
+								 * First check whether the constant is below the lower boundary (in that 
+								 * case we can skip the bucket, because there's no overlap).
+								 */
+								tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																	 DEFAULT_COLLATION_OID,
+																	 cst->constvalue,
+																	 bucket->min[idx]));
+								if (tmp)
+								{
+									matches[i] = MVSTATS_MATCH_NONE; /* no match */
+									continue;
+								}
+
+								/*
+								 * Now check whether the upper boundary is below the constant (in that
+								 * case it's a partial match).
+								 */
+								tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																	 DEFAULT_COLLATION_OID,
+																	 cst->constvalue,
+																	 bucket->max[idx]));
+
+								if (tmp)
+									matches[i] = MVSTATS_MATCH_PARTIAL; /* partial match */
+							}
+							else	/* (const < var) */
+							{
+								/*
+								 * First check whether the constant is above the upper boundary (in that 
+								 * case we can skip the bucket, because there's no overlap).
+								 */
+								tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																	 DEFAULT_COLLATION_OID,
+																	 bucket->max[idx],
+																	 cst->constvalue));
+								if (tmp)
+								{
+									matches[i] = MVSTATS_MATCH_NONE; /* no match */
+									continue;
+								}
+
+								/*
+								 * Now check whether the lower boundary is below the constant (in that
+								 * case it's a partial match).
+								 */
+								tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																	 DEFAULT_COLLATION_OID,
+																	 bucket->min[idx],
+																	 cst->constvalue));
+
+								if (tmp)
+									matches[i] = MVSTATS_MATCH_PARTIAL; /* partial match */
+							}
+							break;
+
+						case F_SCALARGTSEL:	/* column > constant */
+
+							if (! isgt)	/* (var > const) */
+							{
+								/*
+								 * First check whether the constant is above the upper boundary (in that 
+								 * case we can skip the bucket, because there's no overlap).
+								 */
+								tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																	 DEFAULT_COLLATION_OID,
+																	 cst->constvalue,
+																	 bucket->max[idx]));
+								if (tmp)
+								{
+									matches[i] = MVSTATS_MATCH_NONE; /* no match */
+									continue;
+								}
+
+								/*
+								 * Now check whether the lower boundary is below the constant (in that
+								 * case it's a partial match).
+								 */
+								tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																	 DEFAULT_COLLATION_OID,
+																	 cst->constvalue,
+																	 bucket->min[idx]));
+
+								if (tmp)
+									matches[i] = MVSTATS_MATCH_PARTIAL; /* partial match */
+							}
+							else /* (const > var) */
+							{
+								/*
+								 * First check whether the constant is below the lower boundary (in
+								 * that case we can skip the bucket, because there's no overlap).
+								 */
+								tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																	 DEFAULT_COLLATION_OID,
+																	 bucket->min[idx],
+																	 cst->constvalue));
+								if (tmp)
+								{
+									matches[i] = MVSTATS_MATCH_NONE; /* no match */
+									continue;
+								}
+
+								/*
+								 * Now check whether the upper boundary is below the constant (in that
+								 * case it's a partial match).
+								 */
+								tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																	 DEFAULT_COLLATION_OID,
+																	 bucket->max[idx],
+																	 cst->constvalue));
+
+								if (tmp)
+									matches[i] = MVSTATS_MATCH_PARTIAL; /* partial match */
+							}
+
+							break;
+
+						case F_EQSEL:
+
+							/*
+							 * We only check whether the value is within the bucket, using the lt/gt
+							 * operators fetched from type cache.
+							 * 
+							 * TODO We'll use the default 50% estimate, but that's probably way off
+							 *		if there are multiple distinct values. Consider tweaking this a
+							 *		somehow, e.g. using only a part inversely proportional to the
+							 *		estimated number of distinct values in the bucket.
+							 *
+							 * TODO This does not handle inclusion flags at the moment, thus counting
+							 *		some buckets twice (when hitting the boundary).
+							 * 
+							 * TODO Optimization is that if max[i] == min[i], it's effectively a MCV
+							 *		item and we can count the whole bucket as a complete match (thus
+							 *		using 100% bucket selectivity and not just 50%).
+							 * 
+							 * TODO Technically some buckets may "degenerate" into single-value
+							 *		buckets (not necessarily for all the dimensions) - maybe this
+							 *		is better than keeping a separate MCV list (multi-dimensional).
+							 *		Update: Actually, that's unlikely to be better than a separate
+							 *		MCV list for two reasons - first, it requires ~2x the space
+							 *		(because of storing lower/upper boundaries) and second because
+							 *		the buckets are ranges - depending on the partitioning algorithm
+							 *		it may not even degenerate into (min=max) bucket. For example the
+							 *		the current partitioning algorithm never does that.
+							 */
+							tmp = DatumGetBool(FunctionCall2Coll(&ltproc,
+																 DEFAULT_COLLATION_OID,
+																 cst->constvalue,
+																 bucket->min[idx]));
+
+							if (tmp)
+							{
+								matches[i] = MVSTATS_MATCH_NONE;	/* constvalue < min */
+								continue;
+							}
+
+							tmp = DatumGetBool(FunctionCall2Coll(&ltproc,
+																 DEFAULT_COLLATION_OID,
+																 bucket->max[idx],
+																 cst->constvalue));
+
+							if (tmp)
+							{
+								matches[i] = MVSTATS_MATCH_NONE;	/* constvalue > max */
+								continue;
+							}
+
+							/* partial match */
+							matches[i] = MVSTATS_MATCH_PARTIAL;
+
+							break;
+					}
+				}
+			}
+		}
+		else if (IsA(clause, NullTest))
+		{
+			NullTest * expr = (NullTest*)clause;
+			Var * var = (Var*)(expr->arg);
+
+			/* FIXME proper matching attribute to dimension */
+			int idx = mv_get_index(var->varattno, mvstats->stakeys);
+
+			/*
+			 * Walk through the buckets and evaluate the current clause. We can
+			 * skip items that were already ruled out, and terminate if there are
+			 * no remaining buckets that might possibly match.
+			 */
+			for (i = 0; i < mvhist->nbuckets; i++)
+			{
+				MVBucket bucket = mvhist->buckets[i];
+
+				/*
+				 * Skip buckets that were already eliminated - this is impotant
+				 * considering how we update the info (we only lower the match)
+				 */
+				if (matches[i] == MVSTATS_MATCH_NONE)
+					continue;
+
+				/* if the clause mismatches the MCV item, set it as MATCH_NONE */
+				if ((expr->nulltesttype == IS_NULL)
+					&& (! bucket->nullsonly[idx]))
+						matches[i] = MVSTATS_MATCH_NONE;
+				else if ((expr->nulltesttype == IS_NOT_NULL) &&
+						 (bucket->nullsonly[idx]))
+						matches[i] = MVSTATS_MATCH_NONE;
+			}
+		}
+	}
+
+	/* now, walk through the buckets and sum the selectivities */
+	for (i = 0; i < mvhist->nbuckets; i++)
+	{
+		if (matches[i] == MVSTATS_MATCH_FULL)
+			s += mvhist->buckets[i]->ntuples;
+		else if (matches[i] == MVSTATS_MATCH_PARTIAL)
+			s += 0.5 * mvhist->buckets[i]->ntuples;
+	}
+
+	return s;
+}
diff --git a/src/backend/utils/mvstats/Makefile b/src/backend/utils/mvstats/Makefile
index 3c0aff4..9dbb3b6 100644
--- a/src/backend/utils/mvstats/Makefile
+++ b/src/backend/utils/mvstats/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mvstats
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = common.o mcv.o dependencies.o
+OBJS = common.o dependencies.o histogram.o mcv.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mvstats/common.c b/src/backend/utils/mvstats/common.c
index 69ab805..f6edb2f 100644
--- a/src/backend/utils/mvstats/common.c
+++ b/src/backend/utils/mvstats/common.c
@@ -45,7 +45,8 @@ build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
 	{
 		MVDependencies	deps  = NULL;
 		MCVList		mcvlist   = NULL;
-		int numrows_filtered  = 0;
+		MVHistogram	histogram = NULL;
+		int numrows_filtered  = numrows;
 
 		/* int2 vector of attnums the stats should be computed on */
 		int2vector * attrs = mvstats[i].stakeys;
@@ -66,8 +67,23 @@ build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
 		if (mvstats->mcv_enabled)
 			mcvlist = build_mv_mcvlist(numrows, rows, attrs, stats, &numrows_filtered);
 
+		/*
+		 * Build a multivariate histogram on the columns.
+		 *
+		 * FIXME remove the rows used to build the MCV from the histogram.
+		 *       Another option might be subtracting the MCV selectivities
+		 *       from the histogram, but I'm not sure whether that works
+		 *       accurately (maybe it introduces additional errors).
+		 */
+		if ((numrows_filtered > 0) && (mvstats->hist_enabled))
+			histogram = build_mv_histogram(numrows_filtered, rows, attrs, stats, numrows);
+
 		/* store the histogram / MCV list in the catalog */
-		update_mv_stats(mvstats[i].mvoid, deps, mcvlist, attrs, stats);
+		update_mv_stats(mvstats[i].mvoid, deps, mcvlist, histogram, attrs, stats);
+
+#ifdef MVSTATS_DEBUG
+		print_mv_histogram_info(histogram);
+#endif
 	}
 }
 
@@ -149,7 +165,7 @@ list_mv_stats(Oid relid, int *nstats, bool built_only)
 		 * Skip statistics that were not computed yet (if only stats
 		 * that were already built were requested)
 		 */
-		if (built_only && (! (stats->mcv_built || stats->deps_built)))
+		if (built_only && (! (stats->mcv_built || stats->deps_built || stats->hist_built)))
 			continue;
 
 		/* double the array size if needed */
@@ -161,10 +177,15 @@ list_mv_stats(Oid relid, int *nstats, bool built_only)
 
 		result[*nstats].mvoid = HeapTupleGetOid(htup);
 		result[*nstats].stakeys = buildint2vector(stats->stakeys.values, stats->stakeys.dim1);
+
 		result[*nstats].deps_enabled = stats->deps_enabled;
 		result[*nstats].mcv_enabled = stats->mcv_enabled;
+		result[*nstats].hist_enabled = stats->hist_enabled;
+
 		result[*nstats].deps_built = stats->deps_built;
 		result[*nstats].mcv_built = stats->mcv_built;
+		result[*nstats].hist_built = stats->hist_built;
+
 		*nstats += 1;
 	}
 
@@ -178,9 +199,16 @@ list_mv_stats(Oid relid, int *nstats, bool built_only)
 	return result;
 }
 
+/*
+ * FIXME This adds statistics, but we need to drop statistics when the
+ *       table is dropped. Not sure what to do when a column is dropped.
+ *       Either we can (a) remove all stats on that column, (b) remove
+ *       the column from defined stats and force rebuild, (c) remove the
+ *       column on next ANALYZE. Or maybe something else?
+ */
 void
 update_mv_stats(Oid mvoid,
-				MVDependencies dependencies, MCVList mcvlist,
+				MVDependencies dependencies, MCVList mcvlist, MVHistogram histogram,
 				int2vector *attrs, VacAttrStats **stats)
 {
 	HeapTuple	stup,
@@ -213,19 +241,31 @@ update_mv_stats(Oid mvoid,
 		values[Anum_pg_mv_statistic_stamcv  - 1] = PointerGetDatum(data);
 	}
 
+	if (histogram != NULL)
+	{
+		bytea * data = serialize_mv_histogram(histogram, attrs, stats);
+		nulls[Anum_pg_mv_statistic_stahist-1]    = (data == NULL);
+		values[Anum_pg_mv_statistic_stahist - 1]
+			= PointerGetDatum(data);
+	}
+
 	/* always replace the value (either by bytea or NULL) */
 	replaces[Anum_pg_mv_statistic_stadeps -1] = true;
 	replaces[Anum_pg_mv_statistic_stamcv -1] = true;
+	replaces[Anum_pg_mv_statistic_stahist-1] = true;
 
 	/* always change the availability flags */
 	nulls[Anum_pg_mv_statistic_deps_built -1] = false;
 	nulls[Anum_pg_mv_statistic_mcv_built -1] = false;
+	nulls[Anum_pg_mv_statistic_hist_built-1] = false;
 
 	replaces[Anum_pg_mv_statistic_deps_built-1] = true;
 	replaces[Anum_pg_mv_statistic_mcv_built  -1] = true;
+	replaces[Anum_pg_mv_statistic_hist_built -1] = true;
 
 	values[Anum_pg_mv_statistic_deps_built-1] = BoolGetDatum(dependencies != NULL);
 	values[Anum_pg_mv_statistic_mcv_built  -1] = BoolGetDatum(mcvlist != NULL);
+	values[Anum_pg_mv_statistic_hist_built -1] = BoolGetDatum(histogram != NULL);
 
 	/* Is there already a pg_mv_statistic tuple for this attribute? */
 	oldtup = SearchSysCache1(MVSTATOID,
@@ -302,25 +342,6 @@ compare_scalars_partition(const void *a, const void *b, void *arg)
 	return ApplySortComparator(da, false, db, false, ssup);
 }
 
-/*
- * qsort_arg comparator for sorting Datum[] (row of Datums) when
- * counting distinct values.
- */
-int
-compare_scalars_memcmp(const void *a, const void *b, void *arg)
-{
-	Size		len = *(Size*)arg;
-
-	return memcmp(a, b, len);
-}
-
-int
-compare_scalars_memcmp_2(const void *a, const void *b)
-{
-	return memcmp(a, b, sizeof(Datum));
-}
-
-
 /* initialize multi-dimensional sort */
 MultiSortSupport
 multi_sort_init(int ndims)
diff --git a/src/backend/utils/mvstats/common.h b/src/backend/utils/mvstats/common.h
index fca2782..f4309f7 100644
--- a/src/backend/utils/mvstats/common.h
+++ b/src/backend/utils/mvstats/common.h
@@ -47,18 +47,6 @@ typedef struct
 	int			tupno;			/* position index for tuple it came from */
 } ScalarItem;
 
-typedef struct
-{
-	int			count;			/* # of duplicates */
-	int			first;			/* values[] index of first occurrence */
-} ScalarMCVItem;
-
-typedef struct
-{
-	SortSupport ssup;
-	int		   *tupnoLink;
-} CompareScalarsContext;
-
 /* (de)serialization info */
 typedef struct DimensionInfo {
 	int		nvalues;	/* number of deduplicated values */
@@ -94,5 +82,3 @@ int multi_sort_compare_dim(int dim, const SortItem *a,
 int compare_datums_simple(Datum a, Datum b, SortSupport ssup);
 int compare_scalars_simple(const void *a, const void *b, void *arg);
 int compare_scalars_partition(const void *a, const void *b, void *arg);
-int compare_scalars_memcmp(const void *a, const void *b, void *arg);
-int compare_scalars_memcmp_2(const void *a, const void *b);
diff --git a/src/backend/utils/mvstats/histogram.c b/src/backend/utils/mvstats/histogram.c
new file mode 100644
index 0000000..3acbea2
--- /dev/null
+++ b/src/backend/utils/mvstats/histogram.c
@@ -0,0 +1,1778 @@
+/*-------------------------------------------------------------------------
+ *
+ * histogram.c
+ *	  POSTGRES multivariate histograms
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mvstats/histogram.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "common.h"
+
+/*
+ * Multivariate histograms
+ *
+ * Histograms are a collection of buckets, represented by n-dimensional
+ * rectangles. Each rectangle is delimited by a min/max value in each
+ * dimension, stored in an array, so that the bucket includes values
+ * fulfilling condition
+ * 
+ *     min[i] <= value[i] <= max[i]
+ *
+ * where 'i' is the dimension. In 1D this corresponds to a simple
+ * interval, in 2D to a rectangle, and in 3D to a block. If you can
+ * imagine this in 4D, congrats!
+ *
+ * In addition to the bounaries, each bucket tracks additional details:
+ *
+ *     * frequency (fraction of tuples it matches)
+ *     * whether the boundaries are inclusive or exclusive
+ *     * whether the dimension contains only NULL values
+ *     * number of distinct values in each dimension (for building)
+ *
+ * and possibly some additional information.
+ *
+ * We do expect to support multiple histogram types, with different
+ * features etc. The 'type' field is used to identify those types.
+ * Technically some histogram types might use completely different
+ * bucket representation, but that's not expected at the moment.
+ *
+ * Although the current implementation builds non-overlapping buckets,
+ * the code does not rely on the non-overlapping nature - there are
+ * interesting types of histograms / histogram building algorithms
+ * producing overlapping buckets.
+ *
+ * TODO Currently the histogram does not include information about what
+ *      part of the table it covers (because the frequencies are
+ *      computed from the rows that may be filtered by MCV list). Seems
+ *      wrong, possibly causing misestimates (when not matching the MCV
+ *      list, we'll probably get much higher selectivity).
+ *
+ *
+ * Estimating selectivity
+ * ----------------------
+ * With histograms, we always "match" a whole bucket, not indivitual
+ * rows (or values), irrespectedly of the type of clause. Therefore we
+ * can't use the optimizations for equality clauses, as in MCV lists.
+ *
+ * The current implementation uses histograms to estimates those types
+ * of clauses (think of WHERE conditions):
+ *
+ *  (a) equality clauses    WHERE (a = 1) AND (b = 2)
+ *  (b) inequality clauses  WHERE (a < 1) AND (b >= 2)
+ *
+ * It's possible to add more clauses, for example:
+ *
+ *  (a) NULL clauses        WHERE (a IS NULL) AND (b IS NOT NULL)
+ *  (b) multi-var clauses   WHERE (a > b)
+ *
+ * and so on. These are tasks for the future, not yet implemented.
+ *
+ * When used on low-cardinality data, histograms usually perform
+ * considerably worse than MCV lists (which are a good fit for this
+ * kind of data). This is especially true on categorical data, where
+ * ordering of the values is only loosely related to meaning of the
+ * data, as proper ordering is crucial for histograms.
+ *
+ * On high-cardinality data the histograms are usually a better choice,
+ * because MCV lists can't accurately represent the distribution.
+ *
+ * By evaluating a clause on a bucket, we may get one of three results:
+ *
+ *     (a) FULL_MATCH - The bucket definitely matches the clause.
+ *
+ *     (b) PARTIAL_MATCH - The bucket matches the clause, but not
+ *                         necessarily all the tuples it represents.
+ *
+ *     (c) NO_MATCH - The bucket definitely does not match the clause.
+ *
+ * This may be illustrated using a range [1, 5], which is essentially
+ * a 1D bucket. With clause
+ *
+ *     WHERE (a < 10) => FULL_MATCH (all range values are below
+ *                       10, so the whole bucket matches)
+ *
+ *     WHERE (a < 3)  => PARTIAL_MATCH (there may be values matching
+ *                       the clause, but we don't know how many)
+ *
+ *     WHERE (a < 0)  => NO_MATCH (all range values are above 1, so
+ *                       no values from the bucket match)
+ *
+ * Some clauses may produce only some of those results - for example
+ * equality clauses may never produce FULL_MATCH as we always hit only
+ * part of the bucket, not all the values. This results in less accurate
+ * estimates compared to MCV lists, where we can hit a MCV items exactly
+ * (an extreme case of that is 'full match').
+ *
+ * There are clauses that may not produce any PARTIAL_MATCH results.
+ * A nice example of that is 'IS [NOT] NULL' clause, which either
+ * matches the bucket completely (FULL_MATCH) or not at all (NO_MATCH),
+ * thanks to how the NULL-buckets are constructed.
+ *
+ * TODO The IS [NOT] NULL clause is not yet implemented, but should be
+ *      rather trivial to.
+ *
+ * Computing the total selectivity estimate is trivial - simply sum
+ * selectivities from all the FULL_MATCH and PARTIAL_MATCH buckets, but
+ * multiply the PARTIAL_MATCH buckets by 0.5 to minimize average error.
+ *
+ *
+ * NULL handling
+ * -------------
+ * Buckets may not contain tuples with NULL and non-NULL values in
+ * a single dimension (attribute). To handle this, the histogram may
+ * contain NULL-buckets, i.e. buckets with one or more NULL-only
+ * dimensions.
+ *
+ * The maximum number of NULL-buckets is determined by the number of
+ * attributes the histogram is built on. For N-dimensional histogram,
+ * the maximum number of NULL-buckets is 2^N. So for 8 attributes
+ * (which is the current value of MVSTATS_MAX_DIMENSIONS), there may be
+ * up to 256 NULL-buckets.
+ *
+ * Those buckets are only built if needed - if there are no NULL values
+ * in the data, no such buckets are built.
+ *
+ *
+ * Serialization
+ * -------------
+ * After serialization, the histograms are marked with 'magic' constant.
+ * to make sure the bytea really is a histogram in serialized form.
+ * 
+ * FIXME info about deduplication
+ * 
+ * 
+ * TODO This structure is used both when building the histogram, and
+ *      then when using it to compute estimates. That's why the last
+ *      few elements are not used once the histogram is built.
+ *
+ *      Add pointer to 'private' data, meant for private data for
+ *      other algorithms for building the histogram. It also removes
+ *      the bogus / unnecessary fields.
+ *
+ * TODO The limit on number of buckets is quite arbitrary, aiming for
+ *      sufficient accuracy while still being fast. Probably should be
+ *      replaced with a dynamic limit dependent on statistics target,
+ *      number of attributes (dimensions) and statistics target
+ *      associated with the attributes. Also, this needs to be related
+ *      to the number of sampled rows, by either clamping it to a
+ *      reasonable number (after seeing the number of rows) or using
+ *      it when computing the number of rows to sample. Something like
+ *      10 rows per bucket seems reasonable.
+ *
+ * TODO Add MVSTAT_HIST_ROWS_PER_BUCKET tracking minimal number of
+ *      tuples per bucket (also, see the previous TODO).
+ *
+ * TODO We may replace the bool arrays with a suitably large data type
+ *      (say, uint16 or uint32) and get rid of the allocations. It's
+ *      unlikely we'll ever support more than 32 columns as that'd
+ *      result in poor precision, huge histograms (splitting each
+ *      dimension once would mean 2^32 buckets), and very expensive
+ *      estimation. MCVItem already does it this way.
+ *
+ *      Update: Actually, this is not 100% true, because we're splitting
+ *      a single bucket, not all the buckets at the same time. So each
+ *      split simply adds one new bucket, and we choose the bucket that
+ *      is most in need of a slit. So even with 32 columns this might
+ *      give reasonable accuracy, maybe? After 1000 splits we'll get
+ *      about 1001 buckets, and some may be quite large (if that area
+ *      frequency has low frequency of tuples).
+ *
+ *      There are other challenges though - e.g. with this many columns
+ *      it's more likely to reference both label/non-label columns,
+ *      which is rather quirky (especially with histograms).
+ *
+ *      However, while this would save some space for histograms built
+ *      on many columns, it won't save anything for up to 4 columns
+ *      (actually, on less than 3 columns it's probably wasteful).
+ *
+ * TODO Maybe the distinct stats (both for combination of all columns
+ *      and for combinations of various subsets of columns) should be
+ *      moved to a separate structure (next to histogram/MCV/...) to
+ *      make it useful even without a histogram computed etc.
+ */
+
+static MVBucket create_initial_mv_bucket(int numrows, HeapTuple *rows,
+										 int2vector *attrs,
+										 VacAttrStats **stats);
+
+static MVBucket select_bucket_to_partition(int nbuckets, MVBucket * buckets);
+
+static MVBucket partition_bucket(MVBucket bucket, int2vector *attrs,
+								 VacAttrStats **stats);
+
+static MVBucket copy_mv_bucket(MVBucket bucket, uint32 ndimensions);
+
+static void update_bucket_ndistinct(MVBucket bucket, int2vector *attrs,
+									VacAttrStats ** stats);
+
+static void update_dimension_ndistinct(MVBucket bucket, int dimension,
+									   int2vector *attrs,
+									   VacAttrStats ** stats,
+									   bool update_boundaries);
+
+static void create_null_buckets(MVHistogram histogram, int bucket_idx,
+								int2vector *attrs, VacAttrStats ** stats);
+
+static int bsearch_comparator(const void * a, const void * b);
+
+/*
+ * Each serialized bucket needs to store (in this order):
+ *
+ * - number of tuples     (float)
+ * - number of distinct   (float)
+ * - min inclusive flags  (ndim * sizeof(bool))
+ * - max inclusive flags  (ndim * sizeof(bool))
+ * - null dimension flags (ndim * sizeof(bool))
+ * - min boundary indexes (2 * ndim * sizeof(int32))
+ * - max boundary indexes (2 * ndim * sizeof(int32))
+ *
+ * So in total:
+ * 
+ *   ndim * (4 * sizeof(int32) + 3 * sizeof(bool)) +
+ *   2 * sizeof(float)
+ */
+#define BUCKET_SIZE(ndims)	\
+	(ndims * (4 * sizeof(int32) + 3 * sizeof(bool)) + 2 * sizeof(float))
+
+/* pointers into a flat serialized bucket of BUCKET_SIZE(n) bytes */
+#define BUCKET_NTUPLES(b)		((float*)b)
+#define BUCKET_NDISTINCT(b)		((float*)(b + sizeof(float)))
+#define BUCKET_MIN_INCL(b,n)	((bool*)(b + 2 * sizeof(float)))
+#define BUCKET_MAX_INCL(b,n)	(BUCKET_MIN_INCL(b,n) + n)
+#define BUCKET_NULLS_ONLY(b,n)	(BUCKET_MAX_INCL(b,n) + n)
+#define BUCKET_MIN_INDEXES(b,n)	((int32*)(BUCKET_NULLS_ONLY(b,n) + n))
+#define BUCKET_MAX_INDEXES(b,n)	((BUCKET_MIN_INDEXES(b,n) + n))
+
+/* some debugging methods */
+#ifdef MVSTATS_DEBUG
+static void print_mv_histogram_info(MVHistogram histogram);
+#endif
+
+/*
+ * Building a multivariate algorithm. In short it first creates a single
+ * bucket containing all the rows, and then repeatedly split is by first
+ * searching for the bucket / dimension most in need of a split.
+ *
+ * The current criteria is rather simple, by looking at the number of
+ * distinct values (combination of column values for bucket, column
+ * values for a dimension). This is somehow naive, but seems to work
+ * quite well. See the discussion at select_bucket_to_partition and
+ * partition_bucket for more details about alternative algorithms.
+ *
+ * So the current algorithm looks like this:
+ *
+ *     build NULL-buckets (create_null_buckets)
+ *
+ *     while [not reaching maximum number of buckets]
+ *
+ *         choose bucket to partition (max distinct combinations)
+ *             if no bucket to partition
+ *                 terminate the algorithm
+ *
+ *         choose bucket dimension to partition (max distinct values)
+ *             split the bucket into two buckets
+ */
+MVHistogram
+build_mv_histogram(int numrows, HeapTuple *rows, int2vector *attrs,
+				   VacAttrStats **stats, int numrows_total)
+{
+	int i;
+	int ndistinct;
+	int numattrs = attrs->dim1;
+	int *ndistincts = (int*)palloc0(sizeof(int) * numattrs);
+
+	MVHistogram histogram = (MVHistogram)palloc0(sizeof(MVHistogramData));
+
+	HeapTuple * rows_copy = (HeapTuple*)palloc0(numrows * sizeof(HeapTuple));
+	memcpy(rows_copy, rows, sizeof(HeapTuple) * numrows);
+
+	Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS));
+
+	histogram->ndimensions = numattrs;
+
+	histogram->magic = MVSTAT_HIST_MAGIC;
+	histogram->type  = MVSTAT_HIST_TYPE_BASIC;
+	histogram->nbuckets = 1;
+
+	/* create max buckets (better than repalloc for short-lived objects) */
+	histogram->buckets = (MVBucket*)palloc0(MVSTAT_HIST_MAX_BUCKETS * sizeof(MVBucket));
+
+	/* create the initial bucket, covering the whole sample set */
+	histogram->buckets[0]
+		= create_initial_mv_bucket(numrows, rows_copy, attrs, stats);
+
+	/*
+	 * We may use this to limit number of buckets too - there can never
+	 * be more than ndistinct buckets (or ndistinct/k if we require at
+	 * least k tuples per bucket.
+	 *
+	 * With NULL buckets it's a bit more complicated, because there may
+	 * be 2^ndims NULL buckets, and if each contains a single tuple then
+	 * there may be up to
+	 *
+	 *     (ndistinct - 2^ndims)/k + 2^ndims
+	 *
+	 * buckets. But of course, it may happen that (ndistinct < 2^ndims)
+	 * which needs to be checked.
+	 *
+	 * TODO Use this for alternative estimate of number of buckets.
+	 */
+	ndistinct = histogram->buckets[0]->ndistinct;
+
+	/*
+	 * The initial bucket may contain NULL values, so we have to create
+	 * buckets with NULL-only dimensions.
+	 *
+	 * FIXME We may need up to 2^ndims buckets - check that there are
+	 *       enough buckets (MVSTAT_HIST_MAX_BUCKETS >= 2^ndims).
+	 */
+	create_null_buckets(histogram, 0, attrs, stats);
+
+	/* keep the global ndistinct values */
+	for (i = 0; i < numattrs; i++)
+		ndistincts[i] = histogram->buckets[0]->ndistincts[i];
+
+	while (histogram->nbuckets < MVSTAT_HIST_MAX_BUCKETS)
+	{
+		MVBucket bucket = select_bucket_to_partition(histogram->nbuckets,
+													 histogram->buckets);
+
+		/* no more buckets to partition */
+		if (bucket == NULL)
+			break;
+
+		histogram->buckets[histogram->nbuckets]
+			= partition_bucket(bucket, attrs, stats);
+
+		histogram->nbuckets += 1;
+	}
+
+	/* finalize the frequencies etc. */
+	for (i = 0; i < histogram->nbuckets; i++)
+	{
+		int d;
+		histogram->buckets[i]->ntuples
+			= (histogram->buckets[i]->numrows * 1.0) / numrows_total;
+		histogram->buckets[i]->ndistinct
+			= (histogram->buckets[i]->ndistinct * 1.0) / ndistinct;
+
+		for (d = 0; d < numattrs; d++)
+			histogram->buckets[i]->ndistincts[d]
+				= (histogram->buckets[i]->ndistincts[d] * 1.0) / ndistincts[d];
+	}
+
+	pfree(ndistincts);
+
+	return histogram;
+}
+
+/* fetch the histogram (as a bytea) from the pg_mv_statistic catalog */
+bytea *
+fetch_mv_histogram(Oid mvoid)
+{
+	Relation	indrel;
+	SysScanDesc indscan;
+	ScanKeyData skey;
+	HeapTuple	htup;
+	bytea	   *stahist = NULL;
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	ScanKeyInit(&skey,
+				ObjectIdAttributeNumber,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(mvoid));
+
+	indrel = heap_open(MvStatisticRelationId, AccessShareLock);
+	indscan = systable_beginscan(indrel, MvStatisticOidIndexId, true,
+								 NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+	{
+		bool isnull = false;
+		Datum hist  = SysCacheGetAttr(MVSTATOID, htup,
+								   Anum_pg_mv_statistic_stahist, &isnull);
+
+		Assert(!isnull);
+
+		stahist = DatumGetByteaP(hist);
+
+		break;
+	}
+
+	systable_endscan(indscan);
+
+	heap_close(indrel, AccessShareLock);
+
+	/*
+	 * TODO Maybe save the histogram into relcache, as in RelationGetIndexList
+	 *      (which was used as an inspiration of this one)?.
+	 */
+
+	return stahist;
+}
+
+/* print some basic info about the histogram */
+Datum
+pg_mv_stats_histogram_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *data = PG_GETARG_BYTEA_P(0);
+	char	   *result;
+
+	MVHistogram hist = deserialize_mv_histogram(data);
+
+	result = palloc0(128);
+	snprintf(result, 128, "nbuckets=%d", hist->nbuckets);
+
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+
+/* used to pass context into bsearch() */
+static SortSupport ssup_private = NULL;
+
+/*
+ * Serialize the MV histogram into a bytea value. The basic algorithm
+ * is simple, and mostly mimincs the MCV serialization:
+ *
+ * (1) perform deduplication for each attribute (separately)
+ *     (a) collect all (non-NULL) attribute values from all buckets
+ *     (b) sort the data (using 'lt' from VacAttrStats)
+ *     (c) remove duplicate values from the array
+ *
+ * (2) serialize the arrays into a bytea value
+ *
+ * (3) process all buckets
+ *     (a) replace min/max values with indexes into the arrays
+ *
+ * Each attribute has to be processed separately, because we're mixing
+ * different datatypes, and we don't know what equality means for them.
+ * We're also mixing pass-by-value and pass-by-ref types, and so on.
+ *
+ * We'll use 32-bit values for the indexes in step (3), although we
+ * could probably use just 16 bits as we don't allow more than 8k
+ * buckets in the histogram max_buckets (well, we might increase this
+ * to 16k and still fit into signed 16-bits). But let's be lazy and rely
+ * on the varlena compression to kick in. If most bytes will be 0x00
+ * so it should work nicely.
+ *
+ *
+ * Deduplication in serialization
+ * ------------------------------
+ * The deduplication is very effective and important here, because every
+ * time we split a bucket, we keep all the boundary values, except for
+ * the dimension that was used for the split. Another way to look at
+ * this is that each split introduces 1 new value (the value used to do
+ * the split). A histogram with M buckets was created by (M-1) splits
+ * of the initial bucket, and each bucket has 2*N boundary values. So
+ * assuming the initial bucket does not have any 'collapsed' dimensions,
+ * the number of distinct values is
+ *
+ *     (2*N + (M-1))
+ *
+ * but the total number of boundary values is
+ *
+ *     2*N*M
+ *
+ * which is clearly much higher. For a histogram on two columns, with
+ * 1024 buckets, it's 1027 vs. 4096. Of course, we're not saving all
+ * the difference (because we'll use 32-bit indexes into the values).
+ * But with large values (e.g. stored as varlena), this saves a lot.
+ *
+ * An interesting feature is that the total number of distinct values
+ * does not really grow with the number of dimensions, except for the
+ * size of the initial bucket. After that it only depends on number of
+ * buckets (i.e. number of splits).
+ *
+ * XXX Of course this only holds for the current histogram building
+ *     algorithm. Algorithms doing the splits differently (e.g.
+ *     producing overlapping buckets) may behave differently.
+ *
+ * TODO This only confirms we can use the uint16 indexes. The worst
+ *      that could happen is if all the splits happened by a single
+ *      dimension. To exhaust the uint16 this would require ~64k
+ *      splits (needs to be reflected in MVSTAT_HIST_MAX_BUCKETS).
+ *
+ * TODO We don't need to use a separate boolean for each flag, instead
+ *      use a single char and set bits.
+ * 
+ * TODO We might get a bit better compression by considering the actual
+ *      data type length. The current implementation treats all data
+ *      types passed by value as requiring 8B, but for INT it's actually
+ *      just 4B etc.
+ *
+ *      OTOH this is only related to the lookup table, and most of the
+ *      space is occupied by the buckets (with int16 indexes).
+ *
+ *
+ * Varlena compression
+ * -------------------
+ * This encoding may prevent automatic varlena compression (similarly
+ * to JSONB), because first part of the serialized bytea will be an
+ * array of unique values (although sorted), and pglz decides whether
+ * to compress by trying to compress the first part (~1kB or so). Which
+ * is likely to be poor, due to the lack of repetition.
+ *
+ * One possible cure to that might be storing the buckets first, and
+ * then the deduplicated arrays. The buckets might be better suited
+ * for compression.
+ *
+ * On the other hand the encoding scheme is a context-aware compression,
+ * usually compressing to ~30% (or less, with large data types). So the
+ * lack of pglz compression may be OK.
+ *
+ * XXX But maybe we don't really want to compress this, to save on
+ *     planning time?
+ *
+ * TODO Try storing the buckets / deduplicated arrays in reverse order,
+ *      measure impact on compression.
+ *
+ *
+ * Deserialization
+ * ---------------
+ * The deserialization is currently implemented so that it reconstructs
+ * the histogram back into the same structures - this involves quite
+ * a few of memcpy() and palloc(), but maybe we could create a special
+ * structure for the serialized histogram, and access the data directly,
+ * without the unpacking.
+ *
+ * Not only it would save some memory and CPU time, but might actually
+ * work better with CPU caches (not polluting the caches).
+ *
+ * TODO Try to keep the compressed form, instead of deserializing it to
+ *      MVHistogram/MVBucket.
+ *
+ *
+ * General TODOs
+ * -------------
+ * FIXME This probably leaks memory, or at least uses it inefficiently
+ *       (many small palloc() calls instead of a large one).
+ *
+ * FIXME This probably leaks memory, or at least uses it inefficiently
+ *       (many small palloc() calls instead of a large one).
+ *
+ * TODO Consider packing boolean flags (NULL) for each item into 'char'
+ *      or a longer type (instead of using an array of bool items).
+ */
+bytea *
+serialize_mv_histogram(MVHistogram histogram, int2vector *attrs,
+					   VacAttrStats **stats)
+{
+	int i = 0, j = 0;
+	Size	total_length = 0;
+
+	bytea  *output = NULL;
+	char   *data = NULL;
+
+	int		nbuckets = histogram->nbuckets;
+	int		ndims    = histogram->ndimensions;
+
+	/* allocated for serialized bucket data */
+	int		bucketsize = BUCKET_SIZE(ndims);
+	char   *bucket = palloc0(bucketsize);
+
+	/* values per dimension (and number of non-NULL values) */
+	Datum **values = (Datum**)palloc0(sizeof(Datum*) * ndims);
+	int	   *counts = (int*)palloc0(sizeof(int) * ndims);
+
+	/* info about dimensions (for deserialize) */
+	DimensionInfo * info
+				= (DimensionInfo *)palloc0(sizeof(DimensionInfo)*ndims);
+
+	/* sort support data */
+	SortSupport	ssup = (SortSupport)palloc0(sizeof(SortSupportData)*ndims);
+
+	/* collect and deduplicate values for each dimension separately */
+	for (i = 0; i < ndims; i++)
+	{
+		int count;
+		StdAnalyzeData *tmp = (StdAnalyzeData *)stats[i]->extra_data;
+
+		/* keep important info about the data type */
+		info[i].typlen   = stats[i]->attrtype->typlen;
+		info[i].typbyval = stats[i]->attrtype->typbyval;
+
+		/*
+		 * Allocate space for all min/max values, including NULLs
+		 * (we won't use them, but we don't know how many are there),
+		 * and then collect all non-NULL values.
+		 */
+		values[i] = (Datum*)palloc0(sizeof(Datum) * nbuckets * 2);
+
+		for (j = 0; j < histogram->nbuckets; j++)
+		{
+			/* skip buckets where this dimension is NULL-only */
+			if (! histogram->buckets[j]->nullsonly[i])
+			{
+				values[i][counts[i]] = histogram->buckets[j]->min[i];
+				counts[i] += 1;
+
+				values[i][counts[i]] = histogram->buckets[j]->max[i];
+				counts[i] += 1;
+			}
+		}
+
+		/* there are just NULL values in this dimension */
+		if (counts[i] == 0)
+			continue;
+
+		/* sort and deduplicate */
+		ssup[i].ssup_cxt = CurrentMemoryContext;
+		ssup[i].ssup_collation = DEFAULT_COLLATION_OID;
+		ssup[i].ssup_nulls_first = false;
+
+		PrepareSortSupportFromOrderingOp(tmp->ltopr, &ssup[i]);
+
+		qsort_arg(values[i], counts[i], sizeof(Datum),
+										compare_scalars_simple, &ssup[i]);
+
+		/*
+		 * Walk through the array and eliminate duplicitate values, but
+		 * keep the ordering (so that we can do bsearch later). We know
+		 * there's at least 1 item, so we can skip the first element.
+		 */
+		count = 1;	/* number of deduplicated items */
+		for (j = 1; j < counts[i]; j++)
+		{
+			/* if it's different from the previous value, we need to keep it */
+			if (compare_datums_simple(values[i][j-1], values[i][j], &ssup[i]) != 0)
+			{
+				/* XXX: not needed if (count == j) */
+				values[i][count] = values[i][j];
+				count += 1;
+			}
+		}
+
+		/* keep info about the deduplicated count */
+		info[i].nvalues = count;
+
+		/* compute size of the serialized data */
+		if (info[i].typbyval)
+			/*
+			 * passed by value, so just Datum array (int4, int8, ...)
+			 *
+			 * TODO Might save a few bytes here, by storing just typlen
+			 *      bytes instead of whole Datum (8B) on 64-bits.
+			 */
+			info[i].nbytes = info[i].nvalues * sizeof(Datum);
+		else if (info[i].typlen > 0)
+			/* pased by reference, but fixed length (name, tid, ...) */
+			info[i].nbytes = info[i].nvalues * info[i].typlen;
+		else if (info[i].typlen == -1)
+			/* varlena, so just use VARSIZE_ANY */
+			for (j = 0; j < info[i].nvalues; j++)
+				info[i].nbytes += VARSIZE_ANY(values[i][j]);
+		else if (info[i].typlen == -2)
+			/* cstring, so simply strlen */
+			for (j = 0; j < info[i].nvalues; j++)
+				info[i].nbytes += strlen(DatumGetPointer(values[i][j]));
+		else
+			elog(ERROR, "unknown data type typbyval=%d typlen=%d",
+				info[i].typbyval, info[i].typlen);
+	}
+
+	/*
+	 * Now we finally know how much space we'll need for the serialized
+	 * histogram, as it contains these fields:
+	 *
+	 * - length (4B) for varlena
+	 * - magic (4B)
+	 * - type (4B)
+	 * - ndimensions (4B)
+	 * - nbuckets (4B)
+	 * - info (ndim * sizeof(DimensionInfo)
+	 * - arrays of values for each dimension
+	 * - serialized buckets (nbuckets * bucketsize)
+	 *
+	 * So the 'header' size is 20B + ndim * sizeof(DimensionInfo) and
+	 * then we'll place the data (and buckets).
+	 */
+	total_length = (sizeof(int32) + offsetof(MVHistogramData, buckets)
+					+ ndims * sizeof(DimensionInfo)
+					+ nbuckets * bucketsize);
+
+	/* account for the deduplicated data */
+	for (i = 0; i < ndims; i++)
+		total_length += info[i].nbytes;
+
+	/* enforce arbitrary limit of 1MB */
+	if (total_length > 1024 * 1024)
+		elog(ERROR, "serialized histogram exceeds 1MB (%ld)", total_length);
+
+	/* allocate space for the serialized histogram list, set header */
+	output = (bytea*)palloc0(total_length);
+	SET_VARSIZE(output, total_length);
+
+	/* we'll use 'data' to keep track of the place to write data */
+	data = VARDATA(output);
+
+	memcpy(data, histogram, offsetof(MVHistogramData, buckets));
+	data += offsetof(MVHistogramData, buckets);
+
+	memcpy(data, info, sizeof(DimensionInfo) * ndims);
+	data += sizeof(DimensionInfo) * ndims;
+
+	/* value array for each dimension */
+	for (i = 0; i < ndims; i++)
+	{
+#ifdef USE_ASSERT_CHECKING
+		char *tmp = data;
+#endif
+		for (j = 0; j < info[i].nvalues; j++)
+		{
+			if (info[i].typbyval)
+			{
+				/* passed by value / Datum */
+				memcpy(data, &values[i][j], sizeof(Datum));
+				data += sizeof(Datum);
+			}
+			else if (info[i].typlen > 0)
+			{
+				/* pased by reference, but fixed length (name, tid, ...) */
+				memcpy(data, &values[i][j], info[i].typlen);
+				data += info[i].typlen;
+			}
+			else if (info[i].typlen == -1)
+			{
+				/* varlena */
+				memcpy(data, DatumGetPointer(values[i][j]),
+							VARSIZE_ANY(values[i][j]));
+				data += VARSIZE_ANY(values[i][j]);
+			}
+			else if (info[i].typlen == -2)
+			{
+				/* cstring (don't forget the \0 terminator!) */
+				memcpy(data, DatumGetPointer(values[i][j]),
+							strlen(DatumGetPointer(values[i][j])) + 1);
+				data += strlen(DatumGetPointer(values[i][j])) + 1;
+			}
+		}
+		Assert((data - tmp) == info[i].nbytes);
+	}
+
+	/* and finally, the histogram buckets */
+	for (i = 0; i < nbuckets; i++)
+	{
+		/* don't write beyond the allocated space */
+		Assert(data <= (char*)output + total_length - bucketsize);
+
+		/* reset the values for each item */
+		memset(bucket, 0, bucketsize);
+
+		*BUCKET_NTUPLES(bucket)   = histogram->buckets[i]->ntuples;
+		*BUCKET_NDISTINCT(bucket) = histogram->buckets[i]->ndistinct;
+
+		for (j = 0; j < ndims; j++)
+		{
+			/* do the lookup only for non-NULL values */
+			if (! histogram->buckets[i]->nullsonly[j])
+			{
+				int idx;
+				Datum * v = NULL;
+				ssup_private = &ssup[j];
+
+				/* min boundary */
+				v = (Datum*)bsearch(&histogram->buckets[i]->min[j],
+								values[j], info[j].nvalues, sizeof(Datum),
+								bsearch_comparator);
+
+				if (v == NULL)
+					elog(ERROR, "value for dim %d not found in array", j);
+
+				/* compute index within the array */
+				idx = (v - values[j]);
+
+				Assert((idx >= 0) && (idx < info[j].nvalues));
+
+				BUCKET_MIN_INDEXES(bucket, ndims)[j] = idx;
+
+				/* max boundary */
+				v = (Datum*)bsearch(&histogram->buckets[i]->max[j],
+								values[j], info[j].nvalues, sizeof(Datum),
+								bsearch_comparator);
+
+				if (v == NULL)
+					elog(ERROR, "value for dim %d not found in array", j);
+
+				/* compute index within the array */
+				idx = (v - values[j]);
+
+				Assert((idx >= 0) && (idx < info[j].nvalues));
+
+				BUCKET_MAX_INDEXES(bucket, ndims)[j] = idx;
+			}
+		}
+
+		/* copy flags (nulls, min/max inclusive) */
+		memcpy(BUCKET_NULLS_ONLY(bucket, ndims),
+				histogram->buckets[i]->nullsonly, sizeof(bool) * ndims);
+
+		memcpy(BUCKET_MIN_INCL(bucket, ndims),
+				histogram->buckets[i]->min_inclusive, sizeof(bool) * ndims);
+
+		memcpy(BUCKET_MAX_INCL(bucket, ndims),
+				histogram->buckets[i]->max_inclusive, sizeof(bool) * ndims);
+
+		/* copy the item into the array */
+		memcpy(data, bucket, bucketsize);
+
+		data += bucketsize;
+	}
+
+	/* at this point we expect to match the total_length exactly */
+	Assert((data - (char*)output) == total_length);
+
+	/* FIXME free the values/counts arrays here */
+
+	return output;
+}
+
+/*
+ * Reverse to serialize histogram. This essentially expands the serialized
+ * form back to MVHistogram / MVBucket.
+ */
+MVHistogram
+deserialize_mv_histogram(bytea * data)
+{
+	int i = 0, j = 0;
+
+	Size	expected_size;
+	char   *tmp = NULL;
+	Datum **values = NULL;
+
+	MVHistogram histogram;
+	DimensionInfo *info;
+
+	int		nbuckets;
+	int		ndims;
+	int		bucketsize;
+
+	if (data == NULL)
+		return NULL;
+
+	if (VARSIZE_ANY_EXHDR(data) < offsetof(MVHistogramData,buckets))
+		elog(ERROR, "invalid histogram size %ld (expected at least %ld)",
+			 VARSIZE_ANY_EXHDR(data), offsetof(MVHistogramData,buckets));
+
+	/* read the histogram header */
+	histogram = (MVHistogram)palloc0(sizeof(MVHistogramData));
+
+	/* initialize pointer to the data part (skip the varlena header) */
+	tmp = VARDATA(data);
+
+	/* get the header and perform basic sanity checks */
+	memcpy(histogram, tmp, offsetof(MVHistogramData, buckets));
+	tmp += offsetof(MVHistogramData, buckets);
+
+	if (histogram->magic != MVSTAT_HIST_MAGIC)
+		elog(ERROR, "invalid histogram magic %d (expected %dd)",
+			 histogram->magic, MVSTAT_HIST_MAGIC);
+
+	if (histogram->type != MVSTAT_HIST_TYPE_BASIC)
+		elog(ERROR, "invalid histogram type %d (expected %dd)",
+			 histogram->type, MVSTAT_HIST_TYPE_BASIC);
+
+	nbuckets = histogram->nbuckets;
+	ndims    = histogram->ndimensions;
+	bucketsize = BUCKET_SIZE(ndims);
+
+	Assert((nbuckets > 0) && (nbuckets <= MVSTAT_HIST_MAX_BUCKETS));
+	Assert((ndims >= 2) && (ndims <= MVSTATS_MAX_DIMENSIONS));
+
+	/*
+	 * What size do we expect with those parameters (it's incomplete,
+	 * as we yet have to count the array sizes (from DimensionInfo
+	 * records).
+	 */
+	expected_size = offsetof(MVHistogramData,buckets) +
+					ndims * sizeof(DimensionInfo) +
+					(nbuckets * bucketsize);
+
+	/* check that we have at least the DimensionInfo records */
+	if (VARSIZE_ANY_EXHDR(data) < expected_size)
+		elog(ERROR, "invalid histogram size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	info = (DimensionInfo*)(tmp);
+	tmp += ndims * sizeof(DimensionInfo);
+
+	/* account for the value arrays */
+	for (i = 0; i < ndims; i++)
+		expected_size += info[i].nbytes;
+
+	if (VARSIZE_ANY_EXHDR(data) != expected_size)
+		elog(ERROR, "invalid histogram size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	/* looks OK - not corrupted or something */
+
+	/* let's parse the value arrays */
+	values = (Datum**)palloc0(sizeof(Datum*) * ndims);
+
+	/*
+	 * FIXME This uses pointers to the original data array (the types
+	 *       not passed by value), so when someone frees the memory,
+	 *       e.g. by doing something like this:
+	 *
+	 *           bytea * data = ... fetch the data from catalog ...
+	 *           MCVList mcvlist = deserialize_mcv_list(data);
+	 *           pfree(data);
+	 * 
+	 *       then 'mcvlist' references the freed memory. This needs to
+	 *       copy the pieces.
+	 *
+	 * TODO same as in MCV deserialization / consider moving to common.c
+	 */
+	for (i = 0; i < ndims; i++)
+	{
+		if (info[i].typbyval)
+		{
+			/* passed by value / Datum - simply reuse the array */
+			values[i] = (Datum*)tmp;
+			tmp += info[i].nbytes;
+		}
+		else if (info[i].typlen > 0)
+		{
+			/* pased by reference, but fixed length (name, tid, ...) */
+			values[i] = (Datum*)palloc0(sizeof(Datum) * info[i].nvalues);
+			for (j = 0; j < info[i].nvalues; j++)
+			{
+				/* just point into the array */
+				values[i][j] = PointerGetDatum(tmp);
+				tmp += info[i].typlen;
+			}
+		}
+		else if (info[i].typlen == -1)
+		{
+			/* varlena */
+			values[i] = (Datum*)palloc0(sizeof(Datum) * info[i].nvalues);
+			for (j = 0; j < info[i].nvalues; j++)
+			{
+				/* just point into the array */
+				values[i][j] = PointerGetDatum(tmp);
+				tmp += VARSIZE_ANY(tmp);
+			}
+		}
+		else if (info[i].typlen == -2)
+		{
+			/* cstring */
+			values[i] = (Datum*)palloc0(sizeof(Datum) * info[i].nvalues);
+			for (j = 0; j < info[i].nvalues; j++)
+			{
+				/* just point into the array */
+				values[i][j] = PointerGetDatum(tmp);
+				tmp += (strlen(tmp) + 1); /* don't forget the \0 */
+			}
+		}
+	}
+
+	/* allocate space for the buckets */
+	histogram->buckets = (MVBucket*)palloc0(sizeof(MVBucket) * nbuckets);
+
+	for (i = 0; i < nbuckets; i++)
+	{
+		MVBucket bucket = (MVBucket)palloc0(sizeof(MVBucketData));
+
+		bucket->nullsonly     = (bool*) palloc0(sizeof(bool) * ndims);
+		bucket->min_inclusive = (bool*) palloc0(sizeof(bool) * ndims);
+		bucket->max_inclusive = (bool*) palloc0(sizeof(bool) * ndims);
+
+		bucket->min = (Datum*) palloc0(sizeof(Datum) * ndims);
+		bucket->max = (Datum*) palloc0(sizeof(Datum) * ndims);
+
+		bucket->ntuples   = *BUCKET_NTUPLES(tmp);
+		bucket->ndistinct = *BUCKET_NDISTINCT(tmp);
+
+		memcpy(bucket->nullsonly, BUCKET_NULLS_ONLY(tmp, ndims),
+				sizeof(bool) * ndims);
+
+		memcpy(bucket->min_inclusive, BUCKET_MIN_INCL(tmp, ndims),
+				sizeof(bool) * ndims);
+
+		memcpy(bucket->max_inclusive, BUCKET_MAX_INCL(tmp, ndims),
+				sizeof(bool) * ndims);
+
+		/* translate the indexes to values */
+		for (j = 0; j < ndims; j++)
+		{
+			if (! bucket->nullsonly[j])
+			{
+				bucket->min[j] = values[j][BUCKET_MIN_INDEXES(tmp, ndims)[j]];
+				bucket->max[j] = values[j][BUCKET_MAX_INDEXES(tmp, ndims)[j]];
+			}
+		}
+
+		histogram->buckets[i] = bucket;
+
+		Assert(tmp <= (char*)data + VARSIZE_ANY(data));
+
+		tmp += bucketsize;
+	}
+
+	/* at this point we expect to match the total_length exactly */
+	Assert((tmp - VARDATA(data)) == expected_size);
+
+	return histogram;
+}
+
+/*
+ * Build the initial bucket, which will be then split into smaller
+ * buckets.
+ * 
+ * TODO Add ndistinct estimation, probably the one described in "Towards
+ *      Estimation Error Guarantees for Distinct Values, PODS 2000,
+ *      p. 268-279" (the ones called GEE, or maybe AE).
+ *
+ * TODO The "combined" ndistinct is more likely to scale with the number
+ *      of rows (in the table), because a single column behaving this
+ *      way is sufficient for such behavior.
+ */
+static MVBucket
+create_initial_mv_bucket(int numrows, HeapTuple *rows, int2vector *attrs,
+						 VacAttrStats **stats)
+{
+	int i;
+	int	numattrs = attrs->dim1;
+
+	/* TODO allocate bucket as a single piece, including all the fields. */
+	MVBucket bucket = (MVBucket)palloc0(sizeof(MVBucketData));
+
+	Assert(numrows > 0);
+	Assert(rows != NULL);
+	Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS));
+
+	/* allocate the per-dimension arrays */
+	bucket->ndistincts = (uint32*)palloc0(numattrs * sizeof(uint32));
+	bucket->nullsonly = (bool*)palloc0(numattrs * sizeof(bool));
+
+	/* inclusiveness boundaries - lower/upper bounds */
+	bucket->min_inclusive = (bool*)palloc0(numattrs * sizeof(bool));
+	bucket->max_inclusive = (bool*)palloc0(numattrs * sizeof(bool));
+
+	/* lower/upper boundaries */
+	bucket->min = (Datum*)palloc0(numattrs * sizeof(Datum));
+	bucket->max = (Datum*)palloc0(numattrs * sizeof(Datum));
+
+	/* all the sample rows fall into the initial bucket */
+	bucket->numrows = numrows;
+	bucket->ntuples = numrows;
+	bucket->rows = rows;
+
+	/*
+	 * Update the number of ndistinct combinations in the bucket (which
+	 * we use when selecting bucket to partition), and then number of
+	 * distinct values for each partition (which we use when choosing
+	 * which dimension to split).
+	 */
+	update_bucket_ndistinct(bucket, attrs, stats);
+
+	/* Update ndistinct (and also set min/max) for all dimensions. */
+	for (i = 0; i < numattrs; i++)
+		update_dimension_ndistinct(bucket, i, attrs, stats, true);
+
+	/*
+	 * The initial bucket was not split at all, so we'll start with the
+	 * first dimension in the next round (index = 0).
+	 */
+	bucket->last_split_dimension = -1;
+
+	return bucket;
+}
+
+/*
+ * TODO Fix to handle arbitrarily-sized histograms (not just 2D ones)
+ *      and call the right output procedures (for the particular type).
+ *
+ * TODO This should somehow fetch info about the data types, and use
+ *      the appropriate output functions to print the boundary values.
+ *      Right now this prints the 8B value as an integer.
+ *
+ * TODO Also, provide a special function for 2D histogram, printing
+ *      a gnuplot script (with rectangles).
+ *
+ * TODO For string types (once supported) we can sort the strings first,
+ *      assign them a sequence of integers and use the original values
+ *      as labels.
+ */
+#ifdef MVSTATS_DEBUG
+static void
+print_mv_histogram_info(MVHistogram histogram)
+{
+	int i = 0;
+
+	elog(WARNING, "histogram nbuckets=%d", histogram->nbuckets);
+
+	for (i = 0; i < histogram->nbuckets; i++)
+	{
+		MVBucket bucket = histogram->buckets[i];
+		elog(WARNING, "  bucket %d : ndistinct=%f ntuples=%d min=[%ld, %ld], max=[%ld, %ld] distinct=[%d,%d]",
+			i, bucket->ndistinct, bucket->numrows,
+			bucket->min[0], bucket->min[1], bucket->max[0], bucket->max[1],
+			bucket->ndistincts[0], bucket->ndistincts[1]);
+	}
+}
+#endif
+
+/*
+ * A very simple partitioning selection criteria - choose the bucket
+ * with the highest number of distinct values.
+ *
+ * Returns either pointer to the bucket selected to be partitioned,
+ * or NULL if there are no buckets that may be split (i.e. all buckets
+ * contain a single distinct value).
+ *
+ * TODO Consider other partitioning criteria (v-optimal, maxdiff etc.).
+ *
+ * TODO Allowing the bucket to degenerate to a single combination of
+ *      values makes it rather strange MCV list. Maybe we should use
+ *      higher lower boundary, or maybe make the selection criteria
+ *      more complex (e.g. consider number of rows in the bucket, etc.).
+ *
+ *      That however is different from buckets 'degenerated' only for
+ *      some dimensions (e.g. half of them), which is perfectly
+ *      appropriate for statistics on a combination of low and high
+ *      cardinality columns.
+ */
+static MVBucket
+select_bucket_to_partition(int nbuckets, MVBucket * buckets)
+{
+	int i;
+	int ndistinct = 1; /* if ndistinct=1, we can't split the bucket */
+	MVBucket bucket = NULL;
+
+	for (i = 0; i < nbuckets; i++)
+	{
+		/* if the ndistinct count is higher, use this bucket */
+		if (buckets[i]->ndistinct > ndistinct) {
+			bucket = buckets[i];
+			ndistinct = buckets[i]->ndistinct;
+		}
+	}
+
+	/* may be NULL if there are not buckets with (ndistinct>1) */
+	return bucket;
+}
+
+/*
+ * A simple bucket partitioning implementation - splits the dimensions in
+ * a round-robin manner (considering only those with ndistinct>1). That
+ * is first a dimension 0 is split, then 1, 2, ... until reaching the
+ * end of attribute list, and then wrapping back to 0. Of course,
+ * dimensions with a single distinct value are skipped.
+ *
+ * This is essentially what Muralikrishna/DeWitt described in their SIGMOD
+ * article (M. Muralikrishna, David J. DeWitt: Equi-Depth Histograms For
+ * Estimating Selectivity Factors For Multi-Dimensional Queries. SIGMOD
+ * Conference 1988: 28-36).
+ *
+ * There are multiple histogram options, centered around the partitioning
+ * criteria, specifying both how to choose a bucket and the dimension
+ * most in need of a split. For a nice summary and general overview, see
+ * "rK-Hist : an R-Tree based histogram for multi-dimensional selectivity
+ * estimation" thesis by J. A. Lopez, Concordia University, p.34-37 (and
+ * possibly p. 32-34 for explanation of the terms).
+ *
+ * This splits the bucket by tweaking the existing one, and returning the
+ * new bucket (essentially shrinking the existing one in-place and returning
+ * the other "half" as a new bucket). The caller is responsible for adding
+ * the new bucket into the list of buckets.
+ *
+ * TODO It requires care to prevent splitting only one dimension and not
+ *      splitting another one at all (which might happen easily in case of
+ *      strongly dependent columns - e.g. y=x).
+ *
+ * TODO Should probably consider statistics target for the columns (e.g. to
+ *      split dimensions with higher statistics target more frequently).
+ */
+static MVBucket
+partition_bucket(MVBucket bucket, int2vector *attrs,
+				 VacAttrStats **stats)
+{
+	int i;
+	int dimension;
+	int numattrs = attrs->dim1;
+
+	Datum split_value;
+	MVBucket new_bucket;
+
+	/* needed for sort, when looking for the split value */
+	bool isNull;
+	int nvalues = 0;
+	StdAnalyzeData * mystats = NULL;
+	ScalarItem * values = (ScalarItem*)palloc0(bucket->numrows * sizeof(ScalarItem));
+	SortSupportData ssup;
+
+	/* looking for the split value */
+	int ndistinct = 1;	/* number of distinct values below current value */
+	int nrows = 1;		/* number of rows below current value */
+
+	/* needed when splitting the values */
+	HeapTuple * oldrows = bucket->rows;
+	int oldnrows = bucket->numrows;
+
+	/*
+	 * We can't split buckets with a single distinct value (this also
+	 * disqualifies NULL-only dimensions). Also, there has to be multiple
+	 * sample rows (otherwise, how could there be more distinct values).
+	 */
+	Assert(bucket->ndistinct > 1);
+	Assert(bucket->numrows > 1);
+	Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS));
+
+	/*
+	 * Look for the next dimension to split, in a round robin manner.
+	 * We'll use the first one with (ndistinct > 1).
+	 *
+	 * If we happen to wrap around, something clearly went wrong (we
+	 * can't mess with the last_split_dimension directly, because we
+	 * couldn't do this check).
+	 */
+	dimension = bucket->last_split_dimension;
+	while (true)
+	{
+		dimension = (dimension + 1) % numattrs;
+
+		if (bucket->ndistincts[dimension] > 1)
+			break;
+
+		/* if we ran the previous split dimension, it's infinite loop */
+		Assert(dimension != bucket->last_split_dimension);
+	}
+
+	/* Remember the dimension for the next split of this bucket. */
+	bucket->last_split_dimension = dimension;
+
+	/*
+	 * Walk through the selected dimension, collect and sort the values
+	 * and then choose the value to use as the new boundary.
+	 */
+	mystats = (StdAnalyzeData *) stats[dimension]->extra_data;
+
+	/* initialize sort support, etc. */
+	memset(&ssup, 0, sizeof(ssup));
+	ssup.ssup_cxt = CurrentMemoryContext;
+
+	/* We always use the default collation for statistics */
+	ssup.ssup_collation = DEFAULT_COLLATION_OID;
+	ssup.ssup_nulls_first = false;
+
+	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+	for (i = 0; i < bucket->numrows; i++)
+	{
+		/* remember the index of the sample row, to make the partitioning simpler */
+		values[nvalues].value = heap_getattr(bucket->rows[i], attrs->values[dimension],
+											 stats[dimension]->tupDesc, &isNull);
+		values[nvalues].tupno = i;
+
+		/* no NULL values allowed here (we don't do splits by null-only dimensions) */
+		Assert(!isNull);
+
+		nvalues++;
+	}
+
+	/* sort the array of values */
+	qsort_arg((void *) values, nvalues, sizeof(ScalarItem),
+			  compare_scalars_partition, (void *) &ssup);
+
+	/*
+	 * We know there are bucket->ndistincts[dimension] distinct values
+	 * in this dimension, and we want to split this into half, so walk
+	 * through the array and stop once we see (ndistinct/2) values.
+	 *
+	 * We always choose the "next" value, i.e. (n/2+1)-th distinct value,
+	 * and use it as an exclusive upper boundary (and inclusive lower
+	 * boundary).
+	 *
+	 * TODO Maybe we should use "average" of the two middle distinct
+	 *      values (at least for even distinct counts), but that would
+	 *      require being able to do an average (which does not work
+	 *      for non-arithmetic types).
+	 *
+	 * TODO Another option is to look for a split that'd give about
+	 *      50% tuples (not distinct values) in each partition. That
+	 *      might work better when there are a few very frequent
+	 *      values, and many rare ones.
+	 */
+	split_value = values[0].value;
+	for (i = 1; i < bucket->numrows; i++)
+	{
+		/* count distinct values */
+		if (values[i].value != values[i-1].value)
+			ndistinct += 1;
+
+		/* once we've seen 1/2 distinct values (and use the value) */
+		if (ndistinct > bucket->ndistincts[dimension] / 2)
+		{
+			split_value = values[i].value;
+			break;
+		}
+
+		/* keep track how many rows belong to the first bucket */
+		nrows += 1;
+	}
+
+	Assert(nrows > 0);
+	Assert(nrows < bucket->numrows);
+
+	/* create the new bucket as a (incomplete) copy of the one being partitioned. */
+	new_bucket = copy_mv_bucket(bucket, numattrs);
+
+	/*
+	* Do the actual split of the chosen dimension, using the split value as the
+	* upper bound for the existing bucket, and lower bound for the new one.
+	*/
+	bucket->max[dimension]     = split_value;
+	new_bucket->min[dimension] = split_value;
+
+	bucket->max_inclusive[dimension]		= false;
+	new_bucket->max_inclusive[dimension]	= true;
+
+	/*
+	 * Redistribute the sample tuples using the 'ScalarItem->tupno'
+	 * index. We know 'nrows' rows should remain in the original
+	 * bucket and the rest goes to the new one.
+	 */
+
+	bucket->rows     = (HeapTuple*)palloc0(nrows * sizeof(HeapTuple));
+	new_bucket->rows = (HeapTuple*)palloc0((oldnrows - nrows) * sizeof(HeapTuple));
+
+	bucket->numrows	 = nrows;
+	new_bucket->numrows = (oldnrows - nrows);
+
+	/*
+	 * The first nrows should go to the first bucket, the rest should
+	 * go to the new one. Use the tupno field to get the actual HeapTuple
+	 * row from the original array of sample rows.
+	 */
+	for (i = 0; i < nrows; i++)
+		memcpy(&bucket->rows[i], &oldrows[values[i].tupno], sizeof(HeapTuple));
+
+	for (i = nrows; i < oldnrows; i++)
+		memcpy(&new_bucket->rows[i-nrows], &oldrows[values[i].tupno], sizeof(HeapTuple));
+
+	/* update ndistinct values for the buckets (total and per dimension) */
+	update_bucket_ndistinct(bucket, attrs, stats);
+	update_bucket_ndistinct(new_bucket, attrs, stats);
+
+	/*
+	 * TODO We don't need to do this for the dimension we used for split,
+	 *      because we know how many distinct values went to each partition.
+	 */
+	for (i = 0; i < numattrs; i++)
+	{
+		update_dimension_ndistinct(bucket, i, attrs, stats, false);
+		update_dimension_ndistinct(new_bucket, i, attrs, stats, false);
+	}
+
+	pfree(oldrows);
+	pfree(values);
+
+	return new_bucket;
+}
+
+/*
+ * Copy a histogram bucket. The copy does not include the build-time
+ * data, i.e. sampled rows etc.
+ */
+static MVBucket
+copy_mv_bucket(MVBucket bucket, uint32 ndimensions)
+{
+	/* TODO allocate as a single piece (including all the fields) */
+	MVBucket new_bucket = (MVBucket)palloc0(sizeof(MVBucketData));
+
+	/* Copy only the attributes that will stay the same after the split, and
+	 * we'll recompute the rest after the split. */
+
+	new_bucket->last_split_dimension = bucket->last_split_dimension;
+
+	/* allocate the per-dimension arrays */
+	new_bucket->ndistincts = (uint32*)palloc0(ndimensions * sizeof(uint32));
+	new_bucket->nullsonly = (bool*)palloc0(ndimensions * sizeof(bool));
+
+	/* inclusiveness boundaries - lower/upper bounds */
+	new_bucket->min_inclusive = (bool*)palloc0(ndimensions * sizeof(bool));
+	new_bucket->max_inclusive = (bool*)palloc0(ndimensions * sizeof(bool));
+
+	/* lower/upper boundaries */
+	new_bucket->min = (Datum*)palloc0(ndimensions * sizeof(Datum));
+	new_bucket->max = (Datum*)palloc0(ndimensions * sizeof(Datum));
+
+	/* copy data */
+	memcpy(new_bucket->nullsonly, bucket->nullsonly, ndimensions * sizeof(bool));
+
+	memcpy(new_bucket->min_inclusive, bucket->min_inclusive, ndimensions*sizeof(bool));
+	memcpy(new_bucket->min, bucket->min, ndimensions*sizeof(Datum));
+
+	memcpy(new_bucket->max_inclusive, bucket->max_inclusive, ndimensions*sizeof(bool));
+	memcpy(new_bucket->max, bucket->max, ndimensions*sizeof(Datum));
+
+	return new_bucket;
+}
+
+/*
+ * Counts the number of distinct values in the bucket. This just copies
+ * the Datum values into a simple array, and sorts them using memcmp-based
+ * comparator. That means it only works for pass-by-value data types
+ * (assuming they don't use collations etc.)
+ *
+ * TODO This might evaluate and store the distinct counts for all
+ *      possible attribute combinations. The assumption is this might be
+ *      useful for estimating things like GROUP BY cardinalities (e.g.
+ *      in cases when some buckets contain a lot of low-frequency
+ *      combinations, and other buckets contain few high-frequency ones).
+ *
+ *      But it's unclear whether it's worth the price. Computing this
+ *      is actually quite cheap, because it may be evaluated at the very
+ *      end, when the buckets are rather small (so sorting it in 2^N ways
+ *      is not a big deal). Assuming the partitioning algorithm does not
+ *      use these values to do the decisions, of course (the current
+ *      algorithm does not).
+ *
+ *      The overhead with storing, fetching and parsing the data is more
+ *      concerning - adding 2^N values per bucket (even if it's just
+ *      a 1B or 2B value) would significantly bloat the histogram, and
+ *      thus the impact on optimizer. Which is not really desirable.
+ *
+ * TODO This only updates the ndistinct for the sample (or bucket), but
+ *      we eventually need an estimate of the total number of distinct
+ *      values in the dataset. It's possible to either use the current
+ *      1D approach (i.e., if it's more than 10% of the sample, assume
+ *      it's proportional to the number of rows). Or it's possible to
+ *      implement the estimator suggested in the article, supposedly
+ *      giving 'optimal' estimates (w.r.t. probability of error).
+ */
+static void
+update_bucket_ndistinct(MVBucket bucket, int2vector *attrs, VacAttrStats ** stats)
+{
+	int i, j;
+	int numrows = bucket->numrows;
+	int numattrs = attrs->dim1;
+
+	MultiSortSupport mss = multi_sort_init(numattrs);
+
+	/*
+	 * We could collect this while walking through all the attributes
+	 * above (this way we have to call heap_getattr twice).
+	 */
+	SortItem   *items  = (SortItem*)palloc0(numrows * sizeof(SortItem));
+	Datum	   *values = (Datum*)palloc0(numrows * sizeof(Datum) * numattrs);
+	bool	   *isnull = (bool*)palloc0(numrows * sizeof(bool) * numattrs);
+
+	for (i = 0; i < numrows; i++)
+	{
+		items[i].values = &values[i * numattrs];
+		items[i].isnull = &isnull[i * numattrs];
+	}
+
+	/* prepare the sort function for the first dimension */
+	for (i = 0; i < numattrs; i++)
+		multi_sort_add_dimension(mss, i, i, stats);
+
+	/* collect the values */
+	for (i = 0; i < numrows; i++)
+		for (j = 0; j < numattrs; j++)
+			items[i].values[j]
+				= heap_getattr(bucket->rows[i], attrs->values[j],
+								stats[j]->tupDesc, &items[i].isnull[j]);
+
+	qsort_arg((void *) items, numrows, sizeof(SortItem),
+			  multi_sort_compare, mss);
+
+	bucket->ndistinct = 1;
+
+	for (i = 1; i < numrows; i++)
+		if (multi_sort_compare(&items[i], &items[i-1], mss) != 0)
+			bucket->ndistinct += 1;
+
+	pfree(items);
+	pfree(values);
+	pfree(isnull);
+}
+
+/*
+ * Count distinct values per bucket dimension.
+ */
+static void
+update_dimension_ndistinct(MVBucket bucket, int dimension, int2vector *attrs,
+						   VacAttrStats ** stats, bool update_boundaries)
+{
+	int j;
+	int nvalues = 0;
+	bool isNull;
+	Datum * values = (Datum*)palloc0(bucket->numrows * sizeof(Datum));
+	SortSupportData ssup;
+
+	StdAnalyzeData * mystats = (StdAnalyzeData *) stats[dimension]->extra_data;
+
+	/* we may already know this is a NULL-only dimension */
+	if (bucket->nullsonly[dimension])
+		bucket->ndistincts[dimension] = 1;
+
+	memset(&ssup, 0, sizeof(ssup));
+	ssup.ssup_cxt = CurrentMemoryContext;
+
+	/* We always use the default collation for statistics */
+	ssup.ssup_collation = DEFAULT_COLLATION_OID;
+	ssup.ssup_nulls_first = false;
+
+	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+	for (j = 0; j < bucket->numrows; j++)
+	{
+		values[nvalues] = heap_getattr(bucket->rows[j], attrs->values[dimension],
+									   stats[dimension]->tupDesc, &isNull);
+
+		/* ignore NULL values */
+		if (! isNull)
+			nvalues++;
+	}
+
+	/* there's always at least 1 distinct value (may be NULL) */
+	bucket->ndistincts[dimension] = 1;
+
+	/* if there are only NULL values in the column, mark it so and continue
+	 * with the next one */
+	if (nvalues == 0)
+	{
+		pfree(values);
+		bucket->nullsonly[dimension] = true;
+		return;
+	}
+
+	/* sort the array (pass-by-value datum */
+	qsort_arg((void *) values, nvalues, sizeof(Datum),
+			  compare_scalars_simple, (void *) &ssup);
+
+	/*
+	 * Update min/max boundaries to the smallest bounding box. Generally, this
+	 * needs to be done only when constructing the initial bucket.
+	 */
+	if (update_boundaries)
+	{
+		/* store the min/max values */
+		bucket->min[dimension] = values[0];
+		bucket->min_inclusive[dimension] = true;
+
+		bucket->max[dimension] = values[nvalues-1];
+		bucket->max_inclusive[dimension] = true;
+	}
+
+	/*
+	 * Walk through the array and count distinct values by comparing
+	 * succeeding values.
+	 *
+	 * FIXME This only works for pass-by-value types (i.e. not VARCHARs
+	 *       etc.). Although thanks to the deduplication it might work
+	 *       even for those types (equal values will get the same item
+	 *       in the deduplicated array).
+	 */
+	for (j = 1; j < nvalues; j++) {
+		if (values[j] != values[j-1])
+			bucket->ndistincts[dimension] += 1;
+	}
+
+	pfree(values);
+}
+
+/*
+ * A properly built histogram must not contain buckets mixing NULL and
+ * non-NULL values in a single dimension. Each dimension may either be
+ * marked as 'nulls only', and thus containing only NULL values, or
+ * it must not contain any NULL values.
+ *
+ * Therefore, if the sample contains NULL values in any of the columns,
+ * it's necessary to build those NULL-buckets. This is done in an
+ * iterative way using this algorithm, operating on a single bucket:
+ *
+ *     (1) Check that all dimensions are well-formed (not mixing NULL
+ *         and non-NULL values).
+ *
+ *     (2) If all dimensions are well-formed, terminate.
+ *
+ *     (3) If the dimension contains only NULL values, but is not
+ *         marked as NULL-only, mark it as NULL-only and run the
+ *         algorithm again (on this bucket).
+ *
+ *     (4) If the dimension mixes NULL and non-NULL values, split the
+ *         bucket into two parts - one with NULL values, one with
+ *         non-NULL values (replacing the current one). Then run
+ *         the algorithm on both buckets.
+ *
+ * This is executed in a recursive manner, but the number of executions
+ * should be quite low - limited by the number of NULL-buckets. Also,
+ * in each branch the number of nested calls is limited by the number
+ * of dimensions (attributes) of the histogram.
+ *
+ * At the end, there should be buckets with no mixed dimensions. The
+ * number of buckets produced by this algorithm is rather limited - with
+ * N dimensions, there may be only 2^N such buckets (each dimension may
+ * be either NULL or non-NULL). So with 8 dimensions (current value of
+ * MVSTATS_MAX_DIMENSIONS) there may be only 256 such buckets.
+ *
+ * After this, a 'regular' bucket-split algorithm shall run, further
+ * optimizing the histogram.
+ */
+static void
+create_null_buckets(MVHistogram histogram, int bucket_idx,
+					int2vector *attrs, VacAttrStats ** stats)
+{
+	int			i, j;
+	int			null_dim = -1;
+	int			null_count = 0;
+	bool		null_found = false;
+	MVBucket	bucket, null_bucket;
+	int			null_idx, curr_idx;
+
+	/* remember original values from the bucket */
+	int			numrows;
+	HeapTuple  *oldrows = NULL;
+
+	Assert(bucket_idx < histogram->nbuckets);
+	Assert(histogram->ndimensions == attrs->dim1);
+
+	bucket = histogram->buckets[bucket_idx];
+
+	numrows = bucket->numrows;
+	oldrows = bucket->rows;
+
+	/*
+	 * Walk through all rows / dimensions, and stop once we find NULL
+	 * in a dimension not yet marked as NULL-only.
+	 */
+	for (i = 0; i < bucket->numrows; i++)
+	{
+		for (j = 0; j < histogram->ndimensions; j++)
+		{
+			/* Is this a NULL-only dimension? If yes, skip. */
+			if (bucket->nullsonly[j])
+				continue;
+
+			/* found a NULL in that dimension? */
+			if (heap_attisnull(bucket->rows[i], attrs->values[j]))
+			{
+				null_found = true;
+				null_dim = j;
+				break;
+			}
+		}
+
+		/* terminate if we found attribute with NULL values */
+		if (null_found)
+			break;
+	}
+
+	/* no regular dimension contains NULL values => we're done */
+	if (! null_found)
+		return;
+
+	/* walk through the rows again, count NULL values in 'null_dim' */
+	for (i = 0; i < bucket->numrows; i++)
+	{
+		if (heap_attisnull(bucket->rows[i], attrs->values[null_dim]))
+			null_count += 1;
+	}
+
+	Assert(null_count <= bucket->numrows);
+
+	/*
+	 * If (null_count == numrows) the dimension already is NULL-only,
+	 * but is not yet marked like that. It's enough to mark it and
+	 * repeat the process recursively (until we run out of dimensions).
+	 */
+	if (null_count == bucket->numrows)
+	{
+		bucket->nullsonly[null_dim] = true;
+		create_null_buckets(histogram, bucket_idx, attrs, stats);
+		return;
+	}
+
+	/*
+	 * We have to split the bucket into two - one with NULL values in
+	 * the dimension, one with non-NULL values. We don't need to sort
+	 * the data or anything, but otherwise it's similar to what's done
+	 * in partition_bucket().
+	 */
+
+	/* create bucket with NULL-only dimension 'dim' */
+	null_bucket = copy_mv_bucket(bucket, histogram->ndimensions);
+
+	/* remember the current array info */
+	oldrows = bucket->rows;
+	numrows = bucket->numrows;
+
+	/* we'll keep non-NULL values in the current bucket */
+	bucket->numrows = (numrows - null_count);
+	bucket->rows
+		= (HeapTuple*)palloc0(bucket->numrows * sizeof(HeapTuple));
+
+	/* and the NULL values will go to the new one */
+	null_bucket->numrows = null_count;
+	null_bucket->rows
+		= (HeapTuple*)palloc0(null_bucket->numrows * sizeof(HeapTuple));
+
+	/* mark the dimension as NULL-only (in the new bucket) */
+	null_bucket->nullsonly[null_dim] = true;
+
+	/* walk through the sample rows and distribute them accordingly */
+	null_idx = 0;
+	curr_idx = 0;
+	for (i = 0; i < numrows; i++)
+	{
+		if (heap_attisnull(oldrows[i], attrs->values[null_dim]))
+			/* NULL => copy to the new bucket */
+			memcpy(&null_bucket->rows[null_idx++], &oldrows[i],
+					sizeof(HeapTuple));
+		else
+			memcpy(&bucket->rows[curr_idx++], &oldrows[i],
+					sizeof(HeapTuple));
+	}
+
+	/* update ndistinct values for the buckets (total and per dimension) */
+	update_bucket_ndistinct(bucket, attrs, stats);
+	update_bucket_ndistinct(null_bucket, attrs, stats);
+
+	/*
+	 * TODO We don't need to do this for the dimension we used for split,
+	 *      because we know how many distinct values went to each
+	 *      bucket (NULL is not a value, so 0, and the other bucket got
+	 *      all the ndistinct values).
+	 */
+	for (i = 0; i < histogram->ndimensions; i++)
+	{
+		update_dimension_ndistinct(bucket, i, attrs, stats, false);
+		update_dimension_ndistinct(null_bucket, i, attrs, stats, false);
+	}
+
+	pfree(oldrows);
+
+	/* add the NULL bucket to the histogram */
+	histogram->buckets[histogram->nbuckets++] = null_bucket;
+
+	/*
+	 * And now run the function recursively on both buckets (the new
+	 * one first, because the call may change number of buckets, and
+	 * it's used as an index).
+	 */
+	create_null_buckets(histogram, (histogram->nbuckets-1), attrs, stats);
+	create_null_buckets(histogram, bucket_idx, attrs, stats);
+
+}
+
+/*
+ * We need to pass the SortSupport to the comparator, but bsearch()
+ * has no 'context' parameter, so we use a global variable (ugly).
+ */
+static int
+bsearch_comparator(const void * a, const void * b)
+{
+	Assert(ssup_private != NULL);
+	return compare_scalars_simple(a, b, (void*)ssup_private);
+}
diff --git a/src/backend/utils/mvstats/mcv.c b/src/backend/utils/mvstats/mcv.c
index 2b3d171..b0cea61 100644
--- a/src/backend/utils/mvstats/mcv.c
+++ b/src/backend/utils/mvstats/mcv.c
@@ -961,6 +961,7 @@ MCVList deserialize_mv_mcvlist(bytea * data)
 
 	for (i = 0; i < nitems; i++)
 	{
+		/* FIXME allocate as a single chunk (minimize palloc overhead) */
 		MCVItem item = (MCVItem)palloc0(sizeof(MCVItemData));
 
 		item->values = (Datum*)palloc0(sizeof(Datum)*ndims);
diff --git a/src/include/catalog/pg_mv_statistic.h b/src/include/catalog/pg_mv_statistic.h
index f88e200..08424bd 100644
--- a/src/include/catalog/pg_mv_statistic.h
+++ b/src/include/catalog/pg_mv_statistic.h
@@ -36,13 +36,16 @@ CATALOG(pg_mv_statistic,3281)
 	/* statistics requested to build */
 	bool		deps_enabled;		/* analyze dependencies? */
 	bool		mcv_enabled;		/* build MCV list? */
+	bool		hist_enabled;		/* build histogram? */
 
-	/* MCV size */
+	/* histogram / MCV size */
 	int32		mcv_max_items;		/* max MCV items */
+	int32		hist_max_buckets;	/* max histogram buckets */
 
 	/* statistics that are available (if requested) */
 	bool		deps_built;			/* dependencies were built */
 	bool		mcv_built;			/* MCV list was built */
+	bool		hist_built;			/* histogram was built */
 
 	/* variable-length fields start here, but we allow direct access to stakeys */
 	int2vector	stakeys;			/* array of column keys */
@@ -50,6 +53,7 @@ CATALOG(pg_mv_statistic,3281)
 #ifdef CATALOG_VARLEN
 	bytea		stadeps;			/* dependencies (serialized) */
 	bytea		stamcv;				/* MCV list (serialized) */
+	bytea		stahist;			/* MV histogram (serialized) */
 #endif
 
 } FormData_pg_mv_statistic;
@@ -65,15 +69,19 @@ typedef FormData_pg_mv_statistic *Form_pg_mv_statistic;
  *		compiler constants for pg_attrdef
  * ----------------
  */
-#define Natts_pg_mv_statistic					9
+#define Natts_pg_mv_statistic					13
 #define Anum_pg_mv_statistic_starelid			1
 #define Anum_pg_mv_statistic_deps_enabled		2
 #define Anum_pg_mv_statistic_mcv_enabled		3
-#define Anum_pg_mv_statistic_mcv_max_items		4
-#define Anum_pg_mv_statistic_deps_built			5
-#define Anum_pg_mv_statistic_mcv_built			6
-#define Anum_pg_mv_statistic_stakeys			7
-#define Anum_pg_mv_statistic_stadeps			8
-#define Anum_pg_mv_statistic_stamcv				9
+#define Anum_pg_mv_statistic_hist_enabled		4
+#define Anum_pg_mv_statistic_mcv_max_items		5
+#define Anum_pg_mv_statistic_hist_max_buckets	6
+#define Anum_pg_mv_statistic_deps_built			7
+#define Anum_pg_mv_statistic_mcv_built			8
+#define Anum_pg_mv_statistic_hist_built			9
+#define Anum_pg_mv_statistic_stakeys			10
+#define Anum_pg_mv_statistic_stadeps			11
+#define Anum_pg_mv_statistic_stamcv				12
+#define Anum_pg_mv_statistic_stahist			13
 
 #endif   /* PG_MV_STATISTIC_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index b4e7b4f..448e76a 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2689,6 +2689,8 @@ DATA(insert OID = 3285 (  pg_mv_stats_dependencies_show     PGNSP PGUID 12 1 0 0
 DESCR("multivariate stats: functional dependencies show");
 DATA(insert OID = 3283 (  pg_mv_stats_mcvlist_info	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ pg_mv_stats_mcvlist_info _null_ _null_ _null_ ));
 DESCR("multi-variate statistics: MCV list info");
+DATA(insert OID = 3282 (  pg_mv_stats_histogram_info	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ pg_mv_stats_histogram_info _null_ _null_ _null_ ));
+DESCR("multi-variate statistics: histogram info");
 
 DATA(insert OID = 1928 (  pg_stat_get_numscans			PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
 DESCR("statistics: number of scans done for table/index");
diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h
index e11aefc..028a634 100644
--- a/src/include/utils/mvstats.h
+++ b/src/include/utils/mvstats.h
@@ -26,10 +26,12 @@ typedef struct MVStatsData {
 	/* statistics requested in ALTER TABLE ... ADD STATISTICS */
 	bool		deps_enabled;	/* analyze functional dependencies */
 	bool		mcv_enabled;	/* analyze MCV lists */
+	bool		hist_enabled;	/* analyze histogram */
 
 	/* available statistics (computed by ANALYZE) */
 	bool		deps_built;	/* functional dependencies available */
 	bool		mcv_built;	/* MCV list is already available */
+	bool		hist_built;	/* histogram is already available */
 } MVStatsData;
 
 typedef struct MVStatsData *MVStats;
@@ -109,6 +111,91 @@ typedef MCVListData *MCVList;
 #define MVSTAT_MCVLIST_MAX_ITEMS	8192	/* max items in MCV list */
 
 /*
+ * Multivariate histograms
+ */
+typedef struct MVBucketData {
+
+	/* Frequencies of this bucket. */
+	float	ntuples;	/* frequency of tuples tuples */
+	float	ndistinct;	/* frequency of distinct values */
+
+	/*
+	 * Number of distinct values in each dimension. This is used when
+	 * building the histogram (and is not serialized/deserialized), but
+	 * it could be useful for estimating ndistinct for combinations of
+	 * columns.
+	 *
+	 * It would mean tracking 2^N values for each bucket, and even if
+	 * those values might be stores in 1B it's still a lot of space
+	 * (considering the expected number of buckets).
+	 *
+	 * TODO Consider tracking ndistincts for all attribute combinations.
+	 */
+	uint32 *ndistincts;
+
+	/*
+	 * Information about dimensions being NULL-only. Not yet used.
+	 */ 
+	bool   *nullsonly;
+
+	/* lower boundaries - values and information about the inequalities */
+	Datum  *min;
+	bool   *min_inclusive;
+
+	/* upper boundaries - values and information about the inequalities */
+	Datum  *max;
+	bool   *max_inclusive;
+
+	/*
+	 * Sample tuples falling into this bucket, index of the dimension
+	 * the bucket was split by in the last step.
+	 *
+	 * XXX These fields are needed only while building the histogram,
+	 *     and are not serialized at all.
+	 */
+	HeapTuple  *rows;
+	uint32		numrows;
+	int			last_split_dimension;
+
+} MVBucketData;
+
+typedef MVBucketData	*MVBucket;
+
+
+typedef struct MVHistogramData {
+
+	uint32		magic;			/* magic constant marker */
+	uint32		type;			/* type of histogram (BASIC) */
+	uint32 		nbuckets;		/* number of buckets (buckets array) */
+	uint32		ndimensions;	/* number of dimensions */
+
+	MVBucket   *buckets;		/* array of buckets */
+
+} MVHistogramData;
+
+typedef MVHistogramData *MVHistogram;
+
+/* used to flag stats serialized to bytea */
+#define MVSTAT_HIST_MAGIC		0x7F8C5670	/* marks serialized bytea */
+#define MVSTAT_HIST_TYPE_BASIC	1			/* basic histogram type */
+
+/*
+ * Limits used for max_buckets option, i.e. we're always guaranteed
+ * to have space for at least MVSTAT_HIST_MIN_BUCKETS, and we cannot
+ * have more than MVSTAT_HIST_MAX_BUCKETS buckets.
+ *
+ * This is just a boundary for the 'max' threshold - the actual
+ * histogram may use less buckets than MVSTAT_HIST_MAX_BUCKETS.
+ *
+ * TODO The MVSTAT_HIST_MIN_BUCKETS should be related to the number of
+ *      attributes (MVSTATS_MAX_DIMENSIONS) because of NULL-buckets.
+ *      There should be at least 2^N buckets, otherwise we may be unable
+ *      to build the NULL buckets.
+ */
+#define MVSTAT_HIST_MIN_BUCKETS	128			/* min number of buckets */
+#define MVSTAT_HIST_MAX_BUCKETS	16384		/* max number of buckets */
+
+/*
  * TODO Maybe fetching the histogram/MCV list separately is inefficient?
  *      Consider adding a single `fetch_stats` method, fetching all
  *      stats specified using flags (or something like that).
@@ -118,14 +205,18 @@ bytea * fetch_mv_rules(Oid mvoid);
 
 bytea * fetch_mv_dependencies(Oid mvoid);
 bytea * fetch_mv_mcvlist(Oid mvoid);
+bytea * fetch_mv_histogram(Oid mvoid);
 
 bytea * serialize_mv_dependencies(MVDependencies dependencies);
 bytea * serialize_mv_mcvlist(MCVList mcvlist, int2vector *attrs,
 							 VacAttrStats **stats);
+bytea * serialize_mv_histogram(MVHistogram histogram, int2vector *attrs,
+					  VacAttrStats **stats);
 
 /* deserialization of stats (serialization is private to analyze) */
 MVDependencies	deserialize_mv_dependencies(bytea * data);
 MCVList			deserialize_mv_mcvlist(bytea * data);
+MVHistogram		deserialize_mv_histogram(bytea * data);
 
 /*
  * Returns index of the attribute number within the vector (i.e. a
@@ -137,6 +228,7 @@ int mv_get_index(AttrNumber varattno, int2vector * stakeys);
 extern Datum pg_mv_stats_dependencies_info(PG_FUNCTION_ARGS);
 extern Datum pg_mv_stats_dependencies_show(PG_FUNCTION_ARGS);
 extern Datum pg_mv_stats_mcvlist_info(PG_FUNCTION_ARGS);
+extern Datum pg_mv_stats_histogram_info(PG_FUNCTION_ARGS);
 
 MVDependencies
 build_mv_dependencies(int numrows, HeapTuple *rows, int2vector *attrs,
@@ -146,10 +238,15 @@ MCVList
 build_mv_mcvlist(int numrows, HeapTuple *rows, int2vector *attrs,
 				 VacAttrStats **stats, int *numrows_filtered);
 
+MVHistogram
+build_mv_histogram(int numrows, HeapTuple *rows, int2vector *attrs,
+				   VacAttrStats **stats, int numrows_total);
+
 void build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
 					int natts, VacAttrStats **vacattrstats);
 
-void update_mv_stats(Oid relid, MVDependencies dependencies, MCVList mcvlist,
+void update_mv_stats(Oid relid, MVDependencies dependencies,
+					 MCVList mcvlist, MVHistogram histogram,
 					 int2vector *attrs, VacAttrStats **stats);
 
 #endif
diff --git a/src/test/regress/expected/mv_histogram.out b/src/test/regress/expected/mv_histogram.out
new file mode 100644
index 0000000..a0cf37f
--- /dev/null
+++ b/src/test/regress/expected/mv_histogram.out
@@ -0,0 +1,210 @@
+-- data type passed by value
+CREATE TABLE mv_histogram (
+    a INT,
+    b INT,
+    c INT
+);
+-- unknown column
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (unknown_column);
+ERROR:  column "unknown_column" referenced in statistics does not exist
+-- single column
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a);
+ERROR:  multivariate stats require 2 or more columns
+-- single column, duplicated
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, a);
+ERROR:  duplicate column name in statistics definition
+-- two columns, one duplicated
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, a, b);
+ERROR:  duplicate column name in statistics definition
+-- unknown option
+ALTER TABLE mv_histogram ADD STATISTICS (unknown_option) ON (a, b, c);
+ERROR:  unrecognized STATISTICS option "unknown_option"
+-- missing histogram statistics
+ALTER TABLE mv_histogram ADD STATISTICS (dependencies, max_buckets 200) ON (a, b, c);
+ERROR:  option 'histogram' is required by other options(s)
+-- invalid max_buckets value / too low
+ALTER TABLE mv_histogram ADD STATISTICS (mcv, max_buckets 10) ON (a, b, c);
+ERROR:  minimum number of buckets is 128
+-- invalid max_buckets value / too high
+ALTER TABLE mv_histogram ADD STATISTICS (mcv, max_buckets 100000) ON (a, b, c);
+ERROR:  minimum number of buckets is 16384
+-- correct command
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, b, c);
+-- random data (no functional dependencies)
+INSERT INTO mv_histogram
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built | pg_mv_stats_histogram_info 
+--------------+------------+----------------------------
+ t            | t          | nbuckets=10000
+(1 row)
+
+TRUNCATE mv_histogram;  
+-- a => b, a => c, b => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built | pg_mv_stats_histogram_info 
+--------------+------------+----------------------------
+ t            | t          | nbuckets=1001
+(1 row)
+
+TRUNCATE mv_histogram;  
+-- a => b, a => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built | pg_mv_stats_histogram_info 
+--------------+------------+----------------------------
+ t            | t          | nbuckets=1001
+(1 row)
+
+TRUNCATE mv_histogram;  
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mv_histogram 
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX hist_idx ON mv_histogram (a, b);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a = 10 AND b = 5;
+                 QUERY PLAN                 
+--------------------------------------------
+ Bitmap Heap Scan on mv_histogram
+   Recheck Cond: ((a = 10) AND (b = 5))
+   ->  Bitmap Index Scan on hist_idx
+         Index Cond: ((a = 10) AND (b = 5))
+(4 rows)
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+DROP TABLE mv_histogram;
+-- varlena type (text)
+CREATE TABLE mv_histogram (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, b, c);
+-- random data (no functional dependencies)
+INSERT INTO mv_histogram
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built | pg_mv_stats_histogram_info 
+--------------+------------+----------------------------
+ t            | t          | nbuckets=10000
+(1 row)
+
+TRUNCATE mv_histogram;  
+-- a => b, a => c, b => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built | pg_mv_stats_histogram_info 
+--------------+------------+----------------------------
+ t            | t          | nbuckets=3492
+(1 row)
+
+TRUNCATE mv_histogram;  
+-- a => b, a => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built | pg_mv_stats_histogram_info 
+--------------+------------+----------------------------
+ t            | t          | nbuckets=3433
+(1 row)
+
+TRUNCATE mv_histogram;  
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mv_histogram 
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX hist_idx ON mv_histogram (a, b);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a = '10' AND b = '5';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Bitmap Heap Scan on mv_histogram
+   Recheck Cond: ((a = '10'::text) AND (b = '5'::text))
+   ->  Bitmap Index Scan on hist_idx
+         Index Cond: ((a = '10'::text) AND (b = '5'::text))
+(4 rows)
+
+TRUNCATE mv_histogram;
+-- check explain (expect bitmap index scan, not plain index scan) with NULLs
+INSERT INTO mv_histogram 
+     SELECT
+       (CASE WHEN i/10000 = 0 THEN NULL ELSE i/10000 END),
+       (CASE WHEN i/20000 = 0 THEN NULL ELSE i/20000 END),
+       (CASE WHEN i/40000 = 0 THEN NULL ELSE i/40000 END)
+     FROM generate_series(1,1000000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a IS NULL AND b IS NULL;
+                    QUERY PLAN                     
+---------------------------------------------------
+ Bitmap Heap Scan on mv_histogram
+   Recheck Cond: ((a IS NULL) AND (b IS NULL))
+   ->  Bitmap Index Scan on hist_idx
+         Index Cond: ((a IS NULL) AND (b IS NULL))
+(4 rows)
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+DROP TABLE mv_histogram;
+-- NULL values (mix of int and text columns)
+CREATE TABLE mv_histogram (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, b, c, d);
+INSERT INTO mv_histogram
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+DROP TABLE mv_histogram;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 80375b8..07896b4 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1359,7 +1359,9 @@ pg_mv_stats| SELECT n.nspname AS schemaname,
     length(s.stadeps) AS depsbytes,
     pg_mv_stats_dependencies_info(s.stadeps) AS depsinfo,
     length(s.stamcv) AS mcvbytes,
-    pg_mv_stats_mcvlist_info(s.stamcv) AS mcvinfo
+    pg_mv_stats_mcvlist_info(s.stamcv) AS mcvinfo,
+    length(s.stahist) AS histbytes,
+    pg_mv_stats_histogram_info(s.stahist) AS histinfo
    FROM ((pg_mv_statistic s
      JOIN pg_class c ON ((c.oid = s.starelid)))
      LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 78c9b04..d9864b7 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -111,4 +111,4 @@ test: event_trigger
 test: stats
 
 # run tests of multivariate stats
-test: mv_dependencies mv_mcv
+test: mv_dependencies mv_mcv mv_histogram
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 3f9884f..d901a78 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -154,3 +154,4 @@ test: event_trigger
 test: stats
 test: mv_dependencies
 test: mv_mcv
+test: mv_histogram
diff --git a/src/test/regress/sql/mv_histogram.sql b/src/test/regress/sql/mv_histogram.sql
new file mode 100644
index 0000000..a693e35
--- /dev/null
+++ b/src/test/regress/sql/mv_histogram.sql
@@ -0,0 +1,179 @@
+-- data type passed by value
+CREATE TABLE mv_histogram (
+    a INT,
+    b INT,
+    c INT
+);
+
+-- unknown column
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (unknown_column);
+
+-- single column
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a);
+
+-- single column, duplicated
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, a);
+
+-- two columns, one duplicated
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, a, b);
+
+-- unknown option
+ALTER TABLE mv_histogram ADD STATISTICS (unknown_option) ON (a, b, c);
+
+-- missing histogram statistics
+ALTER TABLE mv_histogram ADD STATISTICS (dependencies, max_buckets 200) ON (a, b, c);
+
+-- invalid max_buckets value / too low
+ALTER TABLE mv_histogram ADD STATISTICS (mcv, max_buckets 10) ON (a, b, c);
+
+-- invalid max_buckets value / too high
+ALTER TABLE mv_histogram ADD STATISTICS (mcv, max_buckets 100000) ON (a, b, c);
+
+-- correct command
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, b, c);
+
+-- random data (no functional dependencies)
+INSERT INTO mv_histogram
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;  
+
+-- a => b, a => c, b => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;  
+
+-- a => b, a => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;  
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mv_histogram 
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX hist_idx ON mv_histogram (a, b);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a = 10 AND b = 5;
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+DROP TABLE mv_histogram;
+
+-- varlena type (text)
+CREATE TABLE mv_histogram (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, b, c);
+
+-- random data (no functional dependencies)
+INSERT INTO mv_histogram
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;  
+
+-- a => b, a => c, b => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;  
+
+-- a => b, a => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built, pg_mv_stats_histogram_info(stahist)
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;  
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mv_histogram 
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX hist_idx ON mv_histogram (a, b);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a = '10' AND b = '5';
+
+TRUNCATE mv_histogram;
+
+-- check explain (expect bitmap index scan, not plain index scan) with NULLs
+INSERT INTO mv_histogram 
+     SELECT
+       (CASE WHEN i/10000 = 0 THEN NULL ELSE i/10000 END),
+       (CASE WHEN i/20000 = 0 THEN NULL ELSE i/20000 END),
+       (CASE WHEN i/40000 = 0 THEN NULL ELSE i/40000 END)
+     FROM generate_series(1,1000000) s(i);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a IS NULL AND b IS NULL;
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+DROP TABLE mv_histogram;
+
+-- NULL values (mix of int and text columns)
+CREATE TABLE mv_histogram (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+
+ALTER TABLE mv_histogram ADD STATISTICS (histogram) ON (a, b, c, d);
+
+INSERT INTO mv_histogram
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+DROP TABLE mv_histogram;
-- 
2.0.5