Fix longstanding problems in VACUUM caused by untimely interruptions
authorAlvaro Herrera <[email protected]>
Tue, 10 Nov 2009 18:01:46 +0000 (18:01 +0000)
committerAlvaro Herrera <[email protected]>
Tue, 10 Nov 2009 18:01:46 +0000 (18:01 +0000)
In VACUUM FULL, an interrupt after the initial transaction has been recorded
as committed can cause postmaster to restart with the following error message:
PANIC: cannot abort transaction NNNN, it was already committed
This problem has been reported many times.

In lazy VACUUM, an interrupt after the table has been truncated by
lazy_truncate_heap causes other backends' relcache to still point to the
removed pages; this can cause future INSERT and UPDATE queries to error out
with the following error message:
could not read block XX of relation 1663/NNN/MMMM: read only 0 of 8192 bytes
The window to this race condition is extremely narrow, but it has been seen in
the wild involving a cancelled autovacuum process.

The solution for both problems is to inhibit interrupts in both operations
until after the respective transactions have been committed.  It's not a
complete solution, because the transaction could theoretically be aborted by
some other error, but at least fixes the most common causes of both problems.

src/backend/commands/vacuum.c
src/backend/commands/vacuumlazy.c
src/include/commands/vacuum.h

index 84a654799c54f11db1f889c01c6a422882579983..afb2ad75164d50d6dd240596adfcd42aee74d4e4 100644 (file)
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.263.2.5 2008/02/11 19:15:00 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.263.2.6 2009/11/10 18:01:46 alvherre Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -114,10 +114,10 @@ static void vac_update_dbstats(Oid dbid,
 static void vac_truncate_clog(TransactionId vacuumXID,
                                  TransactionId frozenXID);
 static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind);
-static void full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
+static bool full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
 static void scan_heap(VRelStats *vacrelstats, Relation onerel,
                  VacPageList vacuum_pages, VacPageList fraged_pages);
-static void repair_frag(VRelStats *vacrelstats, Relation onerel,
+static bool repair_frag(VRelStats *vacrelstats, Relation onerel,
                        VacPageList vacuum_pages, VacPageList fraged_pages,
                        int nindexes, Relation *Irel);
 static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
@@ -744,6 +744,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
        bool            result;
        AclId           save_userid;
        bool            save_secdefcxt;
+       bool            heldoff;
 
        /* Begin a transaction for vacuuming this relation */
        StartTransactionCommand();
@@ -860,9 +861,9 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
         * Do the actual work --- either FULL or "lazy" vacuum
         */
        if (vacstmt->full)
-               full_vacuum_rel(onerel, vacstmt);
+               heldoff = full_vacuum_rel(onerel, vacstmt);
        else
-               lazy_vacuum_rel(onerel, vacstmt);
+               heldoff = lazy_vacuum_rel(onerel, vacstmt);
 
        result = true;                          /* did the vacuum */
 
@@ -877,6 +878,10 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
         */
        CommitTransactionCommand();
 
+       /* now we can allow interrupts again, if disabled */
+       if (heldoff)
+               RESUME_INTERRUPTS();
+
        /*
         * If the relation has a secondary toast rel, vacuum that too while we
         * still hold the session lock on the master table.  Note however that
@@ -915,8 +920,11 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
  *
  *             At entry, we have already established a transaction and opened
  *             and locked the relation.
+ *
+ *             The return value indicates whether this function has held off
+ *             interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
  */
-static void
+static bool
 full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
 {
        VacPageListData vacuum_pages;           /* List of pages to vacuum and/or
@@ -927,6 +935,7 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
        int                     nindexes,
                                i;
        VRelStats  *vacrelstats;
+       bool            heldoff = false;
 
        vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
                                                  &OldestXmin, &FreezeLimit);
@@ -968,8 +977,8 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
        if (fraged_pages.num_pages > 0)
        {
                /* Try to shrink heap */
-               repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages,
-                                       nindexes, Irel);
+               heldoff = repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages,
+                                                         nindexes, Irel);
                vac_close_indexes(nindexes, Irel);
        }
        else
@@ -1000,6 +1009,8 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
        /* update statistics in pg_class */
        vac_update_relstats(RelationGetRelid(onerel), vacrelstats->rel_pages,
                                                vacrelstats->rel_tuples, vacrelstats->hasindex);
+
+       return heldoff;
 }
 
 
@@ -1412,8 +1423,11 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
  *             for them after committing (in hack-manner - without losing locks
  *             and freeing memory!) current transaction. It truncates relation
  *             if some end-blocks are gone away.
+ *
+ *             The return value indicates whether this function has held off
+ *             interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
  */
-static void
+static bool
 repair_frag(VRelStats *vacrelstats, Relation onerel,
                        VacPageList vacuum_pages, VacPageList fraged_pages,
                        int nindexes, Relation *Irel)
@@ -1459,6 +1473,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                                dowrite,
                                chain_tuple_moved;
        VacRUsage       ru0;
+       bool            heldoff = false;
 
        vac_init_rusage(&ru0);
 
@@ -2358,7 +2373,13 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                 * a lot of extra code to close and re-open the relation, indexes,
                 * etc.  For now, a quick hack: record status of current
                 * transaction as committed, and continue.
+                *
+                * We prevent cancel interrupts after this point to mitigate the
+                * problem that you can't abort the transaction now; caller is
+                * responsible for re-enabling them after committing the transaction.
                 */
+               HOLD_INTERRUPTS();
+               heldoff = true;
                RecordTransactionCommit();
        }
 
@@ -2591,6 +2612,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
        ExecCloseIndices(resultRelInfo);
 
        FreeExecutorState(estate);
+
+       return heldoff;
 }
 
 /*
index d7daf1ba3c93af85993b96977a4853c06fe7d25f..724f9ec46c41d25b87f02d7cc9d3a500487781d0 100644 (file)
@@ -31,7 +31,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.32.2.3 2009/01/06 14:56:13 heikki Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.32.2.4 2009/11/10 18:01:46 alvherre Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -120,8 +120,11 @@ static int vac_cmp_page_spaces(const void *left, const void *right);
  *
  *             At entry, we have already established a transaction and opened
  *             and locked the relation.
+ *
+ *             The return value indicates whether this function has held off
+ *             interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
  */
-void
+bool
 lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
 {
        LVRelStats *vacrelstats;
@@ -129,6 +132,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
        int                     nindexes;
        bool            hasindex;
        BlockNumber possibly_freeable;
+       bool            heldoff = false;
 
        if (vacstmt->verbose)
                elevel = INFO;
@@ -159,12 +163,22 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
         *
         * Don't even think about it unless we have a shot at releasing a goodly
         * number of pages.  Otherwise, the time taken isn't worth it.
+        *
+        * Note that after we've truncated the heap, it's too late to abort the
+        * transaction; doing so would lose the sinval messages needed to tell the
+        * other backends about the table being shrunk.  We prevent interrupts in
+        * that case; caller is responsible for re-enabling them after
+        * committing the transaction.
         */
        possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
        if (possibly_freeable > 0 &&
                (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
                 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
+       {
+               HOLD_INTERRUPTS();
+               heldoff = true;
                lazy_truncate_heap(onerel, vacrelstats);
+       }
 
        /* Update shared free space map with final free space info */
        lazy_update_fsm(onerel, vacrelstats);
@@ -172,6 +186,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
        /* Update statistics in pg_class */
        vac_update_relstats(RelationGetRelid(onerel), vacrelstats->rel_pages,
                                                vacrelstats->rel_tuples, hasindex);
+
+       return heldoff;
 }
 
 
index 194adb17c8aad9ab8f6c8f0c970ed7826f31de26..99dd9c0c9b23f7f1f2578f535bb4aabb8efdccb7 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: vacuum.h,v 1.46 2003/08/04 02:40:13 momjian Exp $
+ * $Id: vacuum.h,v 1.46.4.1 2009/11/10 18:01:46 alvherre Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -55,7 +55,7 @@ extern void vac_init_rusage(VacRUsage *ru0);
 extern const char *vac_show_rusage(VacRUsage *ru0);
 
 /* in commands/vacuumlazy.c */
-extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
+extern bool lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
 
 /* in commands/analyze.c */
 extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);