Avoid amcheck inline compression false positives.

author Peter Geoghegan <[email protected]>

Wed, 6 Feb 2019 23:54:19 +0000 (15:54 -0800)

committer Peter Geoghegan <[email protected]>

Wed, 6 Feb 2019 23:54:19 +0000 (15:54 -0800)
author Peter Geoghegan <[email protected]>
Wed, 6 Feb 2019 23:54:19 +0000 (15:54 -0800)
committer Peter Geoghegan <[email protected]>
Wed, 6 Feb 2019 23:54:19 +0000 (15:54 -0800)
diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out

index e86457977458bb08fc818aac26e4cfc7c899c1a9..ef5c9e1a1c3d01bfbcafc18f037669bae2a531d6 100644 (file)
--- a/contrib/amcheck/expected/check_btree.out
+++ b/contrib/amcheck/expected/check_btree.out
@@ -140,10 +140,30 @@ SELECT bt_index_parent_check('delete_test_table_pkey', true);
   
  (1 row)
  
+--
+-- BUG #15597: must not assume consistent input toasting state when forming
+-- tuple.  Bloom filter must fingerprint normalized index tuple representation.
+--
+CREATE TABLE toast_bug(buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
+-- pg_attribute entry for toasty.buggy will have plain storage:
+CREATE INDEX toasty ON toast_bug(buggy);
+-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT bt_index_check('toasty', true);
+ bt_index_check 
+----------------
+ 
+(1 row)
+
  -- cleanup
  DROP TABLE bttest_a;
  DROP TABLE bttest_b;
  DROP TABLE bttest_multi;
  DROP TABLE delete_test_table;
+DROP TABLE toast_bug;
  DROP OWNED BY bttest_role; -- permissions
  DROP ROLE bttest_role;
diff --git a/contrib/amcheck/sql/check_btree.sql b/contrib/amcheck/sql/check_btree.sql

index 7b1ab4f148974844adff816105a5c3eef2877f1a..0ad1631476d381e23b9d6819b287e30dd2c43fa0 100644 (file)
--- a/contrib/amcheck/sql/check_btree.sql
+++ b/contrib/amcheck/sql/check_btree.sql
@@ -88,10 +88,26 @@ DELETE FROM delete_test_table WHERE a > 10;
  VACUUM delete_test_table;
  SELECT bt_index_parent_check('delete_test_table_pkey', true);
  
+--
+-- BUG #15597: must not assume consistent input toasting state when forming
+-- tuple.  Bloom filter must fingerprint normalized index tuple representation.
+--
+CREATE TABLE toast_bug(buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
+-- pg_attribute entry for toasty.buggy will have plain storage:
+CREATE INDEX toasty ON toast_bug(buggy);
+-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT bt_index_check('toasty', true);
+
  -- cleanup
  DROP TABLE bttest_a;
  DROP TABLE bttest_b;
  DROP TABLE bttest_multi;
  DROP TABLE delete_test_table;
+DROP TABLE toast_bug;
  DROP OWNED BY bttest_role; -- permissions
  DROP ROLE bttest_role;
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c

index 1c7466c81583317a72112d38330bcc23a7280054..964200a7678e7f11eaa42d7d65081022b2bea3dd 100644 (file)
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -133,6 +133,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state);
  static void bt_tuple_present_callback(Relation index, HeapTuple htup,
                           Datum *values, bool *isnull,
                           bool tupleIsAlive, void *checkstate);
+static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
+                          IndexTuple itup);
  static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
                             OffsetNumber offset);
  static inline bool invariant_leq_offset(BtreeCheckState *state,
@@ -908,7 +910,16 @@ bt_target_page_check(BtreeCheckState *state)
  
         /* Fingerprint leaf page tuples (those that point to the heap) */
         if (state->heapallindexed && P_ISLEAF(topaque) && !ItemIdIsDead(itemid))
-           bloom_add_element(state->filter, (unsigned char *) itup, tupsize);
+       {
+           IndexTuple      norm;
+
+           norm = bt_normalize_tuple(state, itup);
+           bloom_add_element(state->filter, (unsigned char *) norm,
+                             IndexTupleSize(norm));
+           /* Be tidy */
+           if (norm != itup)
+               pfree(norm);
+       }
  
         /*
          * * High key check *
@@ -1672,35 +1683,18 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
                           bool *isnull, bool tupleIsAlive, void *checkstate)
  {
     BtreeCheckState *state = (BtreeCheckState *) checkstate;
-   IndexTuple  itup;
+   IndexTuple  itup, norm;
  
     Assert(state->heapallindexed);
  
-   /*
-    * Generate an index tuple for fingerprinting.
-    *
-    * Index tuple formation is assumed to be deterministic, and IndexTuples
-    * are assumed immutable.  While the LP_DEAD bit is mutable in leaf pages,
-    * that's ItemId metadata, which was not fingerprinted.  (There will often
-    * be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter,
-    * but we only try to detect the absence of needed tuples, so that's
-    * okay.)
-    *
-    * Note that we rely on deterministic index_form_tuple() TOAST
-    * compression. If index_form_tuple() was ever enhanced to compress datums
-    * out-of-line, or otherwise varied when or how compression was applied,
-    * our assumption would break, leading to false positive reports of
-    * corruption.  It's also possible that non-pivot tuples could in the
-    * future have alternative equivalent representations (e.g. by using the
-    * INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted
-    * values as part of fingerprinting.
-    */
+   /* Generate a normalized index tuple for fingerprinting */
     itup = index_form_tuple(RelationGetDescr(index), values, isnull);
     itup->t_tid = htup->t_self;
+   norm = bt_normalize_tuple(state, itup);
  
     /* Probe Bloom filter -- tuple should be present */
-   if (bloom_lacks_element(state->filter, (unsigned char *) itup,
-                           IndexTupleSize(itup)))
+   if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+                           IndexTupleSize(norm)))
         ereport(ERROR,
                 (errcode(ERRCODE_DATA_CORRUPTED),
                  errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
@@ -1714,6 +1708,115 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
  
     state->heaptuplespresent++;
     pfree(itup);
+   /* Cannot leak memory here */
+   if (norm != itup)
+       pfree(norm);
+}
+
+/*
+ * Normalize an index tuple for fingerprinting.
+ *
+ * In general, index tuple formation is assumed to be deterministic by
+ * heapallindexed verification, and IndexTuples are assumed immutable.  While
+ * the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is
+ * not fingerprinted.  Normalization is required to compensate for corner
+ * cases where the determinism assumption doesn't quite work.
+ *
+ * There is currently one such case: index_form_tuple() does not try to hide
+ * the source TOAST state of input datums.  The executor applies TOAST
+ * compression for heap tuples based on different criteria to the compression
+ * applied within btinsert()'s call to index_form_tuple(): it sometimes
+ * compresses more aggressively, resulting in compressed heap tuple datums but
+ * uncompressed corresponding index tuple datums.  A subsequent heapallindexed
+ * verification will get a logically equivalent though bitwise unequal tuple
+ * from index_form_tuple().  False positive heapallindexed corruption reports
+ * could occur without normalizing away the inconsistency.
+ *
+ * Returned tuple is often caller's own original tuple.  Otherwise, it is a
+ * new representation of caller's original index tuple, palloc()'d in caller's
+ * memory context.
+ *
+ * Note: This routine is not concerned with distinctions about the
+ * representation of tuples beyond those that might break heapallindexed
+ * verification.  In particular, it won't try to normalize opclass-equal
+ * datums with potentially distinct representations (e.g., btree/numeric_ops
+ * index datums will not get their display scale normalized-away here).
+ * Normalization may need to be expanded to handle more cases in the future,
+ * though.  For example, it's possible that non-pivot tuples could in the
+ * future have alternative logically equivalent representations due to using
+ * the INDEX_ALT_TID_MASK bit to implement intelligent deduplication.
+ */
+static IndexTuple
+bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
+{
+   TupleDesc   tupleDescriptor = RelationGetDescr(state->rel);
+   Datum       normalized[INDEX_MAX_KEYS];
+   bool        isnull[INDEX_MAX_KEYS];
+   bool        toast_free[INDEX_MAX_KEYS];
+   bool        formnewtup = false;
+   IndexTuple  reformed;
+   int         i;
+
+   /* Easy case: It's immediately clear that tuple has no varlena datums */
+   if (!IndexTupleHasVarwidths(itup))
+       return itup;
+
+   for (i = 0; i < tupleDescriptor->natts; i++)
+   {
+       Form_pg_attribute   att;
+
+       att = TupleDescAttr(tupleDescriptor, i);
+
+       /* Assume untoasted/already normalized datum initially */
+       toast_free[i] = false;
+       normalized[i] = index_getattr(itup, att->attnum,
+                                     tupleDescriptor,
+                                     &isnull[i]);
+       if (att->attbyval || att->attlen != -1 || isnull[i])
+           continue;
+
+       /*
+        * Callers always pass a tuple that could safely be inserted into the
+        * index without further processing, so an external varlena header
+        * should never be encountered here
+        */
+       if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+           ereport(ERROR,
+                   (errcode(ERRCODE_INDEX_CORRUPTED),
+                    errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+                           ItemPointerGetBlockNumber(&(itup->t_tid)),
+                           ItemPointerGetOffsetNumber(&(itup->t_tid)),
+                           RelationGetRelationName(state->rel))));
+       else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+       {
+           formnewtup = true;
+           normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+           toast_free[i] = true;
+       }
+   }
+
+   /* Easier case: Tuple has varlena datums, none of which are compressed */
+   if (!formnewtup)
+       return itup;
+
+   /*
+    * Hard case: Tuple had compressed varlena datums that necessitate
+    * creating normalized version of the tuple from uncompressed input datums
+    * (normalized input datums).  This is rather naive, but shouldn't be
+    * necessary too often.
+    *
+    * Note that we rely on deterministic index_form_tuple() TOAST compression
+    * of normalized input.
+    */
+   reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+   reformed->t_tid = itup->t_tid;
+
+   /* Cannot leak memory here */
+   for (i = 0; i < tupleDescriptor->natts; i++)
+       if (toast_free[i])
+           pfree(DatumGetPointer(normalized[i]));
+
+   return reformed;
  }
  
  /*
author	Peter Geoghegan <[email protected]>
	Wed, 6 Feb 2019 23:54:19 +0000 (15:54 -0800)
committer	Peter Geoghegan <[email protected]>
	Wed, 6 Feb 2019 23:54:19 +0000 (15:54 -0800)
contrib/amcheck/expected/check_btree.out		patch \| blob \| blame \| history
contrib/amcheck/sql/check_btree.sql		patch \| blob \| blame \| history
contrib/amcheck/verify_nbtree.c		patch \| blob \| blame \| history