4242 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
4343 * Portions Copyright (c) 1994, Regents of the University of California
4444 *
45- * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.9 2005/10/15 02:49:09 momjian Exp $
45+ * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.10 2005/10/28 17:27:29 tgl Exp $
4646 *
4747 *-------------------------------------------------------------------------
4848 */
@@ -631,7 +631,16 @@ CreateMultiXactId(int nxids, TransactionId *xids)
631631}
632632
633633/*
634- * OK, assign the MXID and offsets range to use
634+ * Critical section from here until we've written the data; we don't
635+ * want to error out with a partly written MultiXact structure.
636+ * (In particular, failing to write our start offset after advancing
637+ * nextMXact would effectively corrupt the previous MultiXact.)
638+ */
639+ START_CRIT_SECTION ();
640+
641+ /*
642+ * Assign the MXID and offsets range to use, and make sure there is
643+ * space in the OFFSETs and MEMBERs files.
635644 */
636645multi = GetNewMultiXactId (nxids ,& offset );
637646
@@ -668,6 +677,9 @@ CreateMultiXactId(int nxids, TransactionId *xids)
668677/* Now enter the information into the OFFSETs and MEMBERs logs */
669678RecordNewMultiXact (multi ,offset ,nxids ,xids );
670679
680+ /* Done with critical section */
681+ END_CRIT_SECTION ();
682+
671683/* Store the new MultiXactId in the local cache, too */
672684mXactCachePut (multi ,nxids ,xids );
673685
@@ -761,6 +773,7 @@ static MultiXactId
761773GetNewMultiXactId (int nxids ,MultiXactOffset * offset )
762774{
763775MultiXactId result ;
776+ MultiXactOffset nextOffset ;
764777
765778debug_elog3 (DEBUG2 ,"GetNew: for %d xids" ,nxids );
766779
@@ -784,19 +797,28 @@ GetNewMultiXactId(int nxids, MultiXactOffset *offset)
784797 * Advance counter. As in GetNewTransactionId(), this must not happen
785798 * until after ExtendMultiXactOffset has succeeded!
786799 *
787- * We don't care about MultiXactId wraparound here; it will be handled by the
788- * next iteration.But note that nextMXact may be InvalidMultiXactId
800+ * We don't care about MultiXactId wraparound here; it will be handled by
801+ *the next iteration.But note that nextMXact may be InvalidMultiXactId
789802 * after this routine exits, so anyone else looking at the variable must
790803 * be prepared to deal with that.
791804 */
792805(MultiXactState -> nextMXact )++ ;
793806
794807/*
795- * Reserve the members space. Same considerations as above.
808+ * Reserve the members space. Same considerations as above. Also, be
809+ * careful not to return zero as the starting offset for any multixact.
810+ * See GetMultiXactIdMembers() for motivation.
796811 */
797- * offset = MultiXactState -> nextOffset ;
812+ nextOffset = MultiXactState -> nextOffset ;
813+ if (nextOffset == 0 )
814+ {
815+ * offset = 1 ;
816+ nxids ++ ;/* allocate member slot 0 too */
817+ }
818+ else
819+ * offset = nextOffset ;
798820
799- ExtendMultiXactMember (* offset ,nxids );
821+ ExtendMultiXactMember (nextOffset ,nxids );
800822
801823MultiXactState -> nextOffset += nxids ;
802824
@@ -824,6 +846,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
824846MultiXactOffset * offptr ;
825847MultiXactOffset offset ;
826848int length ;
849+ int truelength ;
827850int i ;
828851MultiXactId nextMXact ;
829852MultiXactId tmpMXact ;
@@ -849,13 +872,13 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
849872/*
850873 * We check known limits on MultiXact before resorting to the SLRU area.
851874 *
852- * An ID older than our OldestVisibleMXactId[] entry can't possibly still be
853- * running, and we'd run the risk of trying to read already-truncated SLRU
854- * data if we did try to examine it.
875+ * An ID older than our OldestVisibleMXactId[] entry can't possibly still
876+ *be running, and we'd run the risk of trying to read already-truncated
877+ *SLRU data if we did try to examine it.
855878 *
856- * Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is seen,
857- * it implies undetected ID wraparound has occurred. We just silently
858- * assume that such an ID is no longer running.
879+ * Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is
880+ *seen, it implies undetected ID wraparound has occurred. We just
881+ *silently assume that such an ID is no longer running.
859882 *
860883 * Shared lock is enough here since we aren't modifying any global state.
861884 * Also, we can examine our own OldestVisibleMXactId without the lock,
@@ -868,27 +891,58 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
868891return -1 ;
869892}
870893
894+ /*
895+ * Acquire the shared lock just long enough to grab the current counter
896+ * values. We may need both nextMXact and nextOffset; see below.
897+ */
871898LWLockAcquire (MultiXactGenLock ,LW_SHARED );
872899
873- if (!MultiXactIdPrecedes (multi ,MultiXactState -> nextMXact ))
900+ nextMXact = MultiXactState -> nextMXact ;
901+ nextOffset = MultiXactState -> nextOffset ;
902+
903+ LWLockRelease (MultiXactGenLock );
904+
905+ if (!MultiXactIdPrecedes (multi ,nextMXact ))
874906{
875- LWLockRelease (MultiXactGenLock );
876907debug_elog2 (DEBUG2 ,"GetMembers: it's too new!" );
877908* xids = NULL ;
878909return -1 ;
879910}
880911
881912/*
882- * Before releasing the lock, save the current counter values, because the
883- * target MultiXactId may be just one less than nextMXact.We will need
884- * to use nextOffset as the endpoint if so.
913+ * Find out the offset at which we need to start reading MultiXactMembers
914+ * and the number of members in the multixact. We determine the latter
915+ * as the difference between this multixact's starting offset and the
916+ * next one's. However, there are some corner cases to worry about:
917+ *
918+ * 1. This multixact may be the latest one created, in which case there
919+ * is no next one to look at. In this case the nextOffset value we just
920+ * saved is the correct endpoint.
921+ *
922+ * 2. The next multixact may still be in process of being filled in:
923+ * that is, another process may have done GetNewMultiXactId but not yet
924+ * written the offset entry for that ID. In that scenario, it is
925+ * guaranteed that the offset entry for that multixact exists (because
926+ * GetNewMultiXactId won't release MultiXactGenLock until it does)
927+ * but contains zero (because we are careful to pre-zero offset pages).
928+ * Because GetNewMultiXactId will never return zero as the starting offset
929+ * for a multixact, when we read zero as the next multixact's offset, we
930+ * know we have this case. We sleep for a bit and try again.
931+ *
932+ * 3. Because GetNewMultiXactId increments offset zero to offset one
933+ * to handle case #2, there is an ambiguity near the point of offset
934+ * wraparound. If we see next multixact's offset is one, is that our
935+ * multixact's actual endpoint, or did it end at zero with a subsequent
936+ * increment? We handle this using the knowledge that if the zero'th
937+ * member slot wasn't filled, it'll contain zero, and zero isn't a valid
938+ * transaction ID so it can't be a multixact member. Therefore, if we
939+ * read a zero from the members array, just ignore it.
940+ *
941+ * This is all pretty messy, but the mess occurs only in infrequent corner
942+ * cases, so it seems better than holding the MultiXactGenLock for a long
943+ * time on every multixact creation.
885944 */
886- nextMXact = MultiXactState -> nextMXact ;
887- nextOffset = MultiXactState -> nextOffset ;
888-
889- LWLockRelease (MultiXactGenLock );
890-
891- /* Get the offset at which we need to start reading MultiXactMembers */
945+ retry :
892946LWLockAcquire (MultiXactOffsetControlLock ,LW_EXCLUSIVE );
893947
894948pageno = MultiXactIdToOffsetPage (multi );
@@ -899,20 +953,23 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
899953offptr += entryno ;
900954offset = * offptr ;
901955
956+ Assert (offset != 0 );
957+
902958/*
903- * How many members do we need to read? If we are at the end of the
904- * assigned MultiXactIds, use the offset just saved above.Else we need
905- * to check the MultiXactId following ours.
906- *
907- * Use the same increment rule as GetNewMultiXactId(), that is, don't handle
908- * wraparound explicitly until needed.
959+ * Use the same increment rule as GetNewMultiXactId(), that is, don't
960+ * handle wraparound explicitly until needed.
909961 */
910962tmpMXact = multi + 1 ;
911963
912964if (nextMXact == tmpMXact )
965+ {
966+ /* Corner case 1: there is no next multixact */
913967length = nextOffset - offset ;
968+ }
914969else
915970{
971+ MultiXactOffset nextMXOffset ;
972+
916973/* handle wraparound if needed */
917974if (tmpMXact < FirstMultiXactId )
918975tmpMXact = FirstMultiXactId ;
@@ -927,7 +984,17 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
927984
928985offptr = (MultiXactOffset * )MultiXactOffsetCtl -> shared -> page_buffer [slotno ];
929986offptr += entryno ;
930- length = * offptr - offset ;
987+ nextMXOffset = * offptr ;
988+
989+ if (nextMXOffset == 0 )
990+ {
991+ /* Corner case 2: next multixact is still being filled in */
992+ LWLockRelease (MultiXactOffsetControlLock );
993+ pg_usleep (1000L );
994+ gotoretry ;
995+ }
996+
997+ length = nextMXOffset - offset ;
931998}
932999
9331000LWLockRelease (MultiXactOffsetControlLock );
@@ -938,6 +1005,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
9381005/* Now get the members themselves. */
9391006LWLockAcquire (MultiXactMemberControlLock ,LW_EXCLUSIVE );
9401007
1008+ truelength = 0 ;
9411009prev_pageno = -1 ;
9421010for (i = 0 ;i < length ;i ++ ,offset ++ )
9431011{
@@ -956,19 +1024,26 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
9561024MultiXactMemberCtl -> shared -> page_buffer [slotno ];
9571025xactptr += entryno ;
9581026
959- ptr [i ]= * xactptr ;
1027+ if (!TransactionIdIsValid (* xactptr ))
1028+ {
1029+ /* Corner case 3: we must be looking at unused slot zero */
1030+ Assert (offset == 0 );
1031+ continue ;
1032+ }
1033+
1034+ ptr [truelength ++ ]= * xactptr ;
9601035}
9611036
9621037LWLockRelease (MultiXactMemberControlLock );
9631038
9641039/*
9651040 * Copy the result into the local cache.
9661041 */
967- mXactCachePut (multi ,length ,ptr );
1042+ mXactCachePut (multi ,truelength ,ptr );
9681043
9691044debug_elog3 (DEBUG2 ,"GetMembers: no cache for %s" ,
970- mxid_to_string (multi ,length ,ptr ));
971- return length ;
1045+ mxid_to_string (multi ,truelength ,ptr ));
1046+ return truelength ;
9721047}
9731048
9741049/*