1010 * the passed-in buffer. The caller must hold not only a pin, but at least
1111 * shared buffer content lock on the buffer containing the tuple.
1212 *
13- * NOTE: must check TransactionIdIsInProgress (which looks in PGXACT array)
13+ * NOTE: When using a non-MVCC snapshot, we must check
14+ * TransactionIdIsInProgress (which looks in the PGXACT array)
1415 * before TransactionIdDidCommit/TransactionIdDidAbort (which look in
1516 * pg_clog). Otherwise we have a race condition: we might decide that a
1617 * just-committed transaction crashed, because none of the tests succeed.
1718 * xact.c is careful to record commit/abort in pg_clog before it unsets
18- * MyPgXact->xid in PGXACT array. That fixes that problem, but it also
19- * means there is a window where TransactionIdIsInProgress and
19+ * MyPgXact->xid inthe PGXACT array. That fixes that problem, but it
20+ *also means there is a window where TransactionIdIsInProgress and
2021 * TransactionIdDidCommit will both return true. If we check only
2122 * TransactionIdDidCommit, we could consider a tuple committed when a
2223 * later GetSnapshotData call will still think the originating transaction
2627 * subtransactions of our own main transaction and so there can't be any
2728 * race condition.
2829 *
30+ * When using an MVCC snapshot, we rely on XidInMVCCSnapshot rather than
31+ * TransactionIdIsInProgress, but the logic is otherwise the same: do not
32+ * check pg_clog until after deciding that the xact is no longer in progress.
33+ *
34+ *
2935 * Summary of visibility functions:
3036 *
3137 * HeapTupleSatisfiesMVCC()
@@ -936,9 +942,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
936942 *transactions started after the snapshot was taken
937943 *changes made by the current command
938944 *
939- * (Notice, however, that the tuple status hint bits will be updated on the
940- * basis of the true state of the transaction, even if we then pretend we
941- * can't see it.)
945+ * Notice that here, we will not update the tuple status hint bits if the
946+ * inserting/deleting transaction is still running according to our snapshot,
947+ * even if in reality it's committed or aborted by now. This is intentional.
948+ * Checking the true transaction state would require access to high-traffic
949+ * shared data structures, creating contention we'd rather do without, and it
950+ * would not change the result of our visibility check anyway. The hint bits
951+ * will be updated by the first visitor that has a snapshot new enough to see
952+ * the inserting/deleting transaction as done. In the meantime, the cost of
953+ * leaving the hint bits unset is basically that each HeapTupleSatisfiesMVCC
954+ * call will need to run TransactionIdIsCurrentTransactionId in addition to
955+ * XidInMVCCSnapshot (but it would have to do the latter anyway). In the old
956+ * coding where we tried to set the hint bits as soon as possible, we instead
957+ * did TransactionIdIsInProgress in each call --- to no avail, as long as the
958+ * inserting/deleting transaction was still running --- which was more cycles
959+ * and more contention on the PGXACT array.
942960 */
943961bool
944962HeapTupleSatisfiesMVCC (HeapTuple htup ,Snapshot snapshot ,
@@ -961,7 +979,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
961979
962980if (TransactionIdIsCurrentTransactionId (xvac ))
963981return false;
964- if (!TransactionIdIsInProgress (xvac ))
982+ if (!XidInMVCCSnapshot (xvac , snapshot ))
965983{
966984if (TransactionIdDidCommit (xvac ))
967985{
@@ -980,7 +998,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
980998
981999if (!TransactionIdIsCurrentTransactionId (xvac ))
9821000{
983- if (TransactionIdIsInProgress (xvac ))
1001+ if (XidInMVCCSnapshot (xvac , snapshot ))
9841002return false;
9851003if (TransactionIdDidCommit (xvac ))
9861004SetHintBits (tuple ,buffer ,HEAP_XMIN_COMMITTED ,
@@ -1035,7 +1053,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
10351053else
10361054return false;/* deleted before scan started */
10371055}
1038- else if (TransactionIdIsInProgress (HeapTupleHeaderGetRawXmin (tuple )))
1056+ else if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmin (tuple ), snapshot ))
10391057return false;
10401058else if (TransactionIdDidCommit (HeapTupleHeaderGetRawXmin (tuple )))
10411059SetHintBits (tuple ,buffer ,HEAP_XMIN_COMMITTED ,
@@ -1048,14 +1066,15 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
10481066return false;
10491067}
10501068}
1069+ else
1070+ {
1071+ /* xmin is committed, but maybe not according to our snapshot */
1072+ if (!HeapTupleHeaderXminFrozen (tuple )&&
1073+ XidInMVCCSnapshot (HeapTupleHeaderGetRawXmin (tuple ),snapshot ))
1074+ return false;/* treat as still in progress */
1075+ }
10511076
1052- /*
1053- * By here, the inserting transaction has committed - have to check
1054- * when...
1055- */
1056- if (!HeapTupleHeaderXminFrozen (tuple )
1057- && XidInMVCCSnapshot (HeapTupleHeaderGetRawXmin (tuple ),snapshot ))
1058- return false;/* treat as still in progress */
1077+ /* by here, the inserting transaction has committed */
10591078
10601079if (tuple -> t_infomask & HEAP_XMAX_INVALID )/* xid invalid or aborted */
10611080return true;
@@ -1082,15 +1101,10 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
10821101else
10831102return false;/* deleted before scan started */
10841103}
1085- if (TransactionIdIsInProgress (xmax ))
1104+ if (XidInMVCCSnapshot (xmax , snapshot ))
10861105return true;
10871106if (TransactionIdDidCommit (xmax ))
1088- {
1089- /* updating transaction committed, but when? */
1090- if (XidInMVCCSnapshot (xmax ,snapshot ))
1091- return true;/* treat as still in progress */
1092- return false;
1093- }
1107+ return false;/* updating transaction committed */
10941108/* it must have aborted or crashed */
10951109return true;
10961110}
@@ -1105,7 +1119,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
11051119return false;/* deleted before scan started */
11061120}
11071121
1108- if (TransactionIdIsInProgress (HeapTupleHeaderGetRawXmax (tuple )))
1122+ if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmax (tuple ), snapshot ))
11091123return true;
11101124
11111125if (!TransactionIdDidCommit (HeapTupleHeaderGetRawXmax (tuple )))
@@ -1120,12 +1134,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
11201134SetHintBits (tuple ,buffer ,HEAP_XMAX_COMMITTED ,
11211135HeapTupleHeaderGetRawXmax (tuple ));
11221136}
1137+ else
1138+ {
1139+ /* xmax is committed, but maybe not according to our snapshot */
1140+ if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmax (tuple ),snapshot ))
1141+ return true;/* treat as still in progress */
1142+ }
11231143
1124- /*
1125- * OK, the deleting transaction committed too ... but when?
1126- */
1127- if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmax (tuple ),snapshot ))
1128- return true;/* treat as still in progress */
1144+ /* xmax transaction committed */
11291145
11301146return false;
11311147}
@@ -1383,14 +1399,15 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
13831399/*
13841400 * HeapTupleIsSurelyDead
13851401 *
1386- *Determine whether a tuple is surely dead. We sometimes use this
1387- *in lieu of HeapTupleSatisifesVacuum when the tuple has just been
1388- *tested by HeapTupleSatisfiesMVCC and, therefore, any hint bits that
1389- *can be set should already be set. We assume that if no hint bits
1390- *either for xmin or xmax, the transaction is still running. This is
1391- *therefore faster than HeapTupleSatisfiesVacuum, because we don't
1392- *consult CLOG (and also because we don't need to give an exact answer,
1393- *just whether or not the tuple is surely dead).
1402+ *Cheaply determine whether a tuple is surely dead to all onlookers.
1403+ *We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the
1404+ *tuple has just been tested by another visibility routine (usually
1405+ *HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set
1406+ *should already be set. We assume that if no hint bits are set, the xmin
1407+ *or xmax transaction is still running. This is therefore faster than
1408+ *HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG.
1409+ *It's okay to return FALSE when in doubt, but we must return TRUE only
1410+ *if the tuple is removable.
13941411 */
13951412bool
13961413HeapTupleIsSurelyDead (HeapTuple htup ,TransactionId OldestXmin )
@@ -1443,8 +1460,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin)
14431460 *
14441461 * Note: GetSnapshotData never stores either top xid or subxids of our own
14451462 * backend into a snapshot, so these xids will not be reported as "running"
1446- * by this function. This is OK for current uses, because we actually only
1447- * apply this for known-committed XIDs.
1463+ * by this function. This is OK for current uses, because we always check
1464+ * TransactionIdIsCurrentTransactionId first, except for known-committed
1465+ * XIDs which could not be ours anyway.
14481466 */
14491467static bool
14501468XidInMVCCSnapshot (TransactionId xid ,Snapshot snapshot )
@@ -1481,7 +1499,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
14811499 */
14821500if (!snapshot -> suboverflowed )
14831501{
1484- /* full data, so search subxip */
1502+ /*we have full data, so search subxip */
14851503int32 j ;
14861504
14871505for (j = 0 ;j < snapshot -> subxcnt ;j ++ )
@@ -1494,7 +1512,10 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
14941512}
14951513else
14961514{
1497- /* overflowed, so convert xid to top-level */
1515+ /*
1516+ * Snapshot overflowed, so convert xid to top-level. This is safe
1517+ * because we eliminated too-old XIDs above.
1518+ */
14981519xid = SubTransGetTopmostTransaction (xid );
14991520
15001521/*
@@ -1525,7 +1546,10 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
15251546 */
15261547if (snapshot -> suboverflowed )
15271548{
1528- /* overflowed, so convert xid to top-level */
1549+ /*
1550+ * Snapshot overflowed, so convert xid to top-level. This is safe
1551+ * because we eliminated too-old XIDs above.
1552+ */
15291553xid = SubTransGetTopmostTransaction (xid );
15301554
15311555/*