1313 *
1414 *
1515 * IDENTIFICATION
16- * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.354 2007/08/01 22:45:08 tgl Exp $
16+ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.355 2007/08/13 19:08:26 tgl Exp $
1717 *
1818 *-------------------------------------------------------------------------
1919 */
@@ -1163,13 +1163,11 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
11631163vacuum_set_xid_limits (vacstmt -> freeze_min_age ,onerel -> rd_rel -> relisshared ,
11641164& OldestXmin ,& FreezeLimit );
11651165
1166- /*
1167- * VACUUM FULL assumes that all tuple states are well-known prior to
1168- * moving tuples around --- see comment "known dead" in repair_frag(),
1169- * as well as simplifications in tqual.c. So before we start we must
1170- * ensure that any asynchronously-committed transactions with changes
1171- * against this table have been flushed to disk. It's sufficient to do
1172- * this once after we've acquired AccessExclusiveLock.
1166+ /*
1167+ * Flush any previous async-commit transactions. This does not guarantee
1168+ * that we will be able to set hint bits for tuples they inserted, but
1169+ * it improves the probability, especially in simple sequential-commands
1170+ * cases. See scan_heap() and repair_frag() for more about this.
11731171 */
11741172XLogAsyncCommitFlush ();
11751173
@@ -1386,15 +1384,38 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
13861384
13871385switch (HeapTupleSatisfiesVacuum (tuple .t_data ,OldestXmin ,buf ))
13881386{
1389- case HEAPTUPLE_DEAD :
1390- tupgone = true;/* we can delete the tuple */
1391- break ;
13921387case HEAPTUPLE_LIVE :
13931388/* Tuple is good --- but let's do some validity checks */
13941389if (onerel -> rd_rel -> relhasoids &&
13951390!OidIsValid (HeapTupleGetOid (& tuple )))
13961391elog (WARNING ,"relation \"%s\" TID %u/%u: OID is invalid" ,
13971392relname ,blkno ,offnum );
1393+ /*
1394+ * The shrinkage phase of VACUUM FULL requires that all
1395+ * live tuples have XMIN_COMMITTED set --- see comments in
1396+ * repair_frag()'s walk-along-page loop. Use of async
1397+ * commit may prevent HeapTupleSatisfiesVacuum from
1398+ * setting the bit for a recently committed tuple. Rather
1399+ * than trying to handle this corner case, we just give up
1400+ * and don't shrink.
1401+ */
1402+ if (do_shrinking &&
1403+ !(tuple .t_data -> t_infomask & HEAP_XMIN_COMMITTED ))
1404+ {
1405+ ereport (LOG ,
1406+ (errmsg ("relation \"%s\" TID %u/%u: XMIN_COMMITTED not set for transaction %u --- cannot shrink relation" ,
1407+ relname ,blkno ,offnum ,
1408+ HeapTupleHeaderGetXmin (tuple .t_data ))));
1409+ do_shrinking = false;
1410+ }
1411+ break ;
1412+ case HEAPTUPLE_DEAD :
1413+ tupgone = true;/* we can delete the tuple */
1414+ /*
1415+ * We need not require XMIN_COMMITTED or XMAX_COMMITTED to
1416+ * be set, since we will remove the tuple without any
1417+ * further examination of its hint bits.
1418+ */
13981419break ;
13991420case HEAPTUPLE_RECENTLY_DEAD :
14001421
@@ -1404,6 +1425,20 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
14041425 */
14051426nkeep += 1 ;
14061427
1428+ /*
1429+ * As with the LIVE case, shrinkage requires XMIN_COMMITTED
1430+ * to be set.
1431+ */
1432+ if (do_shrinking &&
1433+ !(tuple .t_data -> t_infomask & HEAP_XMIN_COMMITTED ))
1434+ {
1435+ ereport (LOG ,
1436+ (errmsg ("relation \"%s\" TID %u/%u: XMIN_COMMITTED not set for transaction %u --- cannot shrink relation" ,
1437+ relname ,blkno ,offnum ,
1438+ HeapTupleHeaderGetXmin (tuple .t_data ))));
1439+ do_shrinking = false;
1440+ }
1441+
14071442/*
14081443 * If we do shrinking and this tuple is updated one then
14091444 * remember it to construct updated tuple dependencies.
@@ -1429,26 +1464,34 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
14291464
14301465/*
14311466 * This should not happen, since we hold exclusive lock on
1432- * the relation; shouldn't we raise an error? (Actually,
1467+ * the relation; shouldn't we raise an error? (Actually,
14331468 * it can happen in system catalogs, since we tend to
1434- * release write lock before commit there.)
1469+ * release write lock before commit there.) As above,
1470+ * we can't apply repair_frag() if the tuple state is
1471+ * uncertain.
14351472 */
1436- ereport (NOTICE ,
1437- (errmsg ("relation \"%s\" TID %u/%u: InsertTransactionInProgress %u --- cannot shrink relation" ,
1438- relname ,blkno ,offnum ,HeapTupleHeaderGetXmin (tuple .t_data ))));
1473+ if (do_shrinking )
1474+ ereport (LOG ,
1475+ (errmsg ("relation \"%s\" TID %u/%u: InsertTransactionInProgress %u --- cannot shrink relation" ,
1476+ relname ,blkno ,offnum ,
1477+ HeapTupleHeaderGetXmin (tuple .t_data ))));
14391478do_shrinking = false;
14401479break ;
14411480case HEAPTUPLE_DELETE_IN_PROGRESS :
14421481
14431482/*
14441483 * This should not happen, since we hold exclusive lock on
1445- * the relation; shouldn't we raise an error? (Actually,
1484+ * the relation; shouldn't we raise an error? (Actually,
14461485 * it can happen in system catalogs, since we tend to
1447- * release write lock before commit there.)
1486+ * release write lock before commit there.) As above,
1487+ * we can't apply repair_frag() if the tuple state is
1488+ * uncertain.
14481489 */
1449- ereport (NOTICE ,
1450- (errmsg ("relation \"%s\" TID %u/%u: DeleteTransactionInProgress %u --- cannot shrink relation" ,
1451- relname ,blkno ,offnum ,HeapTupleHeaderGetXmax (tuple .t_data ))));
1490+ if (do_shrinking )
1491+ ereport (LOG ,
1492+ (errmsg ("relation \"%s\" TID %u/%u: DeleteTransactionInProgress %u --- cannot shrink relation" ,
1493+ relname ,blkno ,offnum ,
1494+ HeapTupleHeaderGetXmax (tuple .t_data ))));
14521495do_shrinking = false;
14531496break ;
14541497default :
@@ -1810,19 +1853,21 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
18101853 * VACUUM FULL has an exclusive lock on the relation. So
18111854 * normally no other transaction can have pending INSERTs or
18121855 * DELETEs in this relation. A tuple is either:
1813- *(a) a tuple in a system catalog, inserted or deleted
1814- *by a not yet committed transaction
1856+ *(a) live (XMIN_COMMITTED)
18151857 *(b) known dead (XMIN_INVALID, or XMAX_COMMITTED and xmax
18161858 *is visible to all active transactions)
1817- *(c) inserted by a committed xact (XMIN_COMMITTED)
1818- *(d) moved by the currently running VACUUM.
1819- *(e) deleted (XMAX_COMMITTED) but at least one active
1820- *transaction does not see the deleting transaction
1821- * In case (a) we wouldn't be in repair_frag() at all.
1822- * In case (b) we cannot be here, because scan_heap() has
1823- * already marked the item as unused, see continue above. Case
1824- * (c) is what normally is to be expected. Case (d) is only
1825- * possible, if a whole tuple chain has been moved while
1859+ *(c) inserted and deleted (XMIN_COMMITTED+XMAX_COMMITTED)
1860+ *but at least one active transaction does not see the
1861+ *deleting transaction (ie, it's RECENTLY_DEAD)
1862+ *(d) moved by the currently running VACUUM
1863+ *(e) inserted or deleted by a not yet committed transaction,
1864+ *or by a transaction we couldn't set XMIN_COMMITTED for.
1865+ * In case (e) we wouldn't be in repair_frag() at all, because
1866+ * scan_heap() detects those cases and shuts off shrinking.
1867+ * We can't see case (b) here either, because such tuples were
1868+ * already removed by vacuum_page(). Cases (a) and (c) are
1869+ * normal and will have XMIN_COMMITTED set. Case (d) is only
1870+ * possible if a whole tuple chain has been moved while
18261871 * processing this or a higher numbered block.
18271872 * ---
18281873 */
@@ -1997,16 +2042,22 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
19972042ReleaseBuffer (nextBuf );
19982043break ;
19992044}
2000- /* must check for DEAD or MOVED_IN tuple, too */
2045+ /*
2046+ * Must check for DEAD or MOVED_IN tuple, too. This
2047+ * could potentially update hint bits, so we'd better
2048+ * hold the buffer content lock.
2049+ */
2050+ LockBuffer (nextBuf ,BUFFER_LOCK_SHARE );
20012051nextTstatus = HeapTupleSatisfiesVacuum (nextTdata ,
20022052OldestXmin ,
20032053nextBuf );
20042054if (nextTstatus == HEAPTUPLE_DEAD ||
20052055nextTstatus == HEAPTUPLE_INSERT_IN_PROGRESS )
20062056{
2007- ReleaseBuffer (nextBuf );
2057+ UnlockReleaseBuffer (nextBuf );
20082058break ;
20092059}
2060+ LockBuffer (nextBuf ,BUFFER_LOCK_UNLOCK );
20102061/* if it's MOVED_OFF we shoulda moved this one with it */
20112062if (nextTstatus == HEAPTUPLE_DELETE_IN_PROGRESS )
20122063elog (ERROR ,"updated tuple is already HEAP_MOVED_OFF" );