Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1986ca5

Browse files
committed
Fix race condition in multixact code: it's possible to try to read a
multixact's starting offset before the offset has been stored into theSLRU file. A simple fix would be to hold the MultiXactGenLock until theoffset has been stored, but that looks like a big concurrency hit. Insteadrely on knowledge that unset offsets will be zero, and loop when we seea zero. This requires a little extra hacking to ensure that zero is nevera valid value for the offset. Problem reported by Matteo Beccati, fixideas from Martijn van Oosterhout, Alvaro Herrera, and Tom Lane.
1 parent21b748e commit1986ca5

File tree

1 file changed

+110
-35
lines changed

1 file changed

+110
-35
lines changed

‎src/backend/access/transam/multixact.c

Lines changed: 110 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
4343
* Portions Copyright (c) 1994, Regents of the University of California
4444
*
45-
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.9 2005/10/15 02:49:09 momjian Exp $
45+
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.10 2005/10/28 17:27:29 tgl Exp $
4646
*
4747
*-------------------------------------------------------------------------
4848
*/
@@ -631,7 +631,16 @@ CreateMultiXactId(int nxids, TransactionId *xids)
631631
}
632632

633633
/*
634-
* OK, assign the MXID and offsets range to use
634+
* Critical section from here until we've written the data; we don't
635+
* want to error out with a partly written MultiXact structure.
636+
* (In particular, failing to write our start offset after advancing
637+
* nextMXact would effectively corrupt the previous MultiXact.)
638+
*/
639+
START_CRIT_SECTION();
640+
641+
/*
642+
* Assign the MXID and offsets range to use, and make sure there is
643+
* space in the OFFSETs and MEMBERs files.
635644
*/
636645
multi=GetNewMultiXactId(nxids,&offset);
637646

@@ -668,6 +677,9 @@ CreateMultiXactId(int nxids, TransactionId *xids)
668677
/* Now enter the information into the OFFSETs and MEMBERs logs */
669678
RecordNewMultiXact(multi,offset,nxids,xids);
670679

680+
/* Done with critical section */
681+
END_CRIT_SECTION();
682+
671683
/* Store the new MultiXactId in the local cache, too */
672684
mXactCachePut(multi,nxids,xids);
673685

@@ -761,6 +773,7 @@ static MultiXactId
761773
GetNewMultiXactId(intnxids,MultiXactOffset*offset)
762774
{
763775
MultiXactIdresult;
776+
MultiXactOffsetnextOffset;
764777

765778
debug_elog3(DEBUG2,"GetNew: for %d xids",nxids);
766779

@@ -784,19 +797,28 @@ GetNewMultiXactId(int nxids, MultiXactOffset *offset)
784797
* Advance counter. As in GetNewTransactionId(), this must not happen
785798
* until after ExtendMultiXactOffset has succeeded!
786799
*
787-
* We don't care about MultiXactId wraparound here; it will be handled by the
788-
* next iteration.But note that nextMXact may be InvalidMultiXactId
800+
* We don't care about MultiXactId wraparound here; it will be handled by
801+
*thenext iteration.But note that nextMXact may be InvalidMultiXactId
789802
* after this routine exits, so anyone else looking at the variable must
790803
* be prepared to deal with that.
791804
*/
792805
(MultiXactState->nextMXact)++;
793806

794807
/*
795-
* Reserve the members space. Same considerations as above.
808+
* Reserve the members space. Same considerations as above. Also, be
809+
* careful not to return zero as the starting offset for any multixact.
810+
* See GetMultiXactIdMembers() for motivation.
796811
*/
797-
*offset=MultiXactState->nextOffset;
812+
nextOffset=MultiXactState->nextOffset;
813+
if (nextOffset==0)
814+
{
815+
*offset=1;
816+
nxids++;/* allocate member slot 0 too */
817+
}
818+
else
819+
*offset=nextOffset;
798820

799-
ExtendMultiXactMember(*offset,nxids);
821+
ExtendMultiXactMember(nextOffset,nxids);
800822

801823
MultiXactState->nextOffset+=nxids;
802824

@@ -824,6 +846,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
824846
MultiXactOffset*offptr;
825847
MultiXactOffsetoffset;
826848
intlength;
849+
inttruelength;
827850
inti;
828851
MultiXactIdnextMXact;
829852
MultiXactIdtmpMXact;
@@ -849,13 +872,13 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
849872
/*
850873
* We check known limits on MultiXact before resorting to the SLRU area.
851874
*
852-
* An ID older than our OldestVisibleMXactId[] entry can't possibly still be
853-
* running, and we'd run the risk of trying to read already-truncated SLRU
854-
* data if we did try to examine it.
875+
* An ID older than our OldestVisibleMXactId[] entry can't possibly still
876+
*berunning, and we'd run the risk of trying to read already-truncated
877+
*SLRUdata if we did try to examine it.
855878
*
856-
* Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is seen,
857-
* it implies undetected ID wraparound has occurred. We just silently
858-
* assume that such an ID is no longer running.
879+
* Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is
880+
*seen,it implies undetected ID wraparound has occurred. We just
881+
*silentlyassume that such an ID is no longer running.
859882
*
860883
* Shared lock is enough here since we aren't modifying any global state.
861884
* Also, we can examine our own OldestVisibleMXactId without the lock,
@@ -868,27 +891,58 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
868891
return-1;
869892
}
870893

894+
/*
895+
* Acquire the shared lock just long enough to grab the current counter
896+
* values. We may need both nextMXact and nextOffset; see below.
897+
*/
871898
LWLockAcquire(MultiXactGenLock,LW_SHARED);
872899

873-
if (!MultiXactIdPrecedes(multi,MultiXactState->nextMXact))
900+
nextMXact=MultiXactState->nextMXact;
901+
nextOffset=MultiXactState->nextOffset;
902+
903+
LWLockRelease(MultiXactGenLock);
904+
905+
if (!MultiXactIdPrecedes(multi,nextMXact))
874906
{
875-
LWLockRelease(MultiXactGenLock);
876907
debug_elog2(DEBUG2,"GetMembers: it's too new!");
877908
*xids=NULL;
878909
return-1;
879910
}
880911

881912
/*
882-
* Before releasing the lock, save the current counter values, because the
883-
* target MultiXactId may be just one less than nextMXact.We will need
884-
* to use nextOffset as the endpoint if so.
913+
* Find out the offset at which we need to start reading MultiXactMembers
914+
* and the number of members in the multixact. We determine the latter
915+
* as the difference between this multixact's starting offset and the
916+
* next one's. However, there are some corner cases to worry about:
917+
*
918+
* 1. This multixact may be the latest one created, in which case there
919+
* is no next one to look at. In this case the nextOffset value we just
920+
* saved is the correct endpoint.
921+
*
922+
* 2. The next multixact may still be in process of being filled in:
923+
* that is, another process may have done GetNewMultiXactId but not yet
924+
* written the offset entry for that ID. In that scenario, it is
925+
* guaranteed that the offset entry for that multixact exists (because
926+
* GetNewMultiXactId won't release MultiXactGenLock until it does)
927+
* but contains zero (because we are careful to pre-zero offset pages).
928+
* Because GetNewMultiXactId will never return zero as the starting offset
929+
* for a multixact, when we read zero as the next multixact's offset, we
930+
* know we have this case. We sleep for a bit and try again.
931+
*
932+
* 3. Because GetNewMultiXactId increments offset zero to offset one
933+
* to handle case #2, there is an ambiguity near the point of offset
934+
* wraparound. If we see next multixact's offset is one, is that our
935+
* multixact's actual endpoint, or did it end at zero with a subsequent
936+
* increment? We handle this using the knowledge that if the zero'th
937+
* member slot wasn't filled, it'll contain zero, and zero isn't a valid
938+
* transaction ID so it can't be a multixact member. Therefore, if we
939+
* read a zero from the members array, just ignore it.
940+
*
941+
* This is all pretty messy, but the mess occurs only in infrequent corner
942+
* cases, so it seems better than holding the MultiXactGenLock for a long
943+
* time on every multixact creation.
885944
*/
886-
nextMXact=MultiXactState->nextMXact;
887-
nextOffset=MultiXactState->nextOffset;
888-
889-
LWLockRelease(MultiXactGenLock);
890-
891-
/* Get the offset at which we need to start reading MultiXactMembers */
945+
retry:
892946
LWLockAcquire(MultiXactOffsetControlLock,LW_EXCLUSIVE);
893947

894948
pageno=MultiXactIdToOffsetPage(multi);
@@ -899,20 +953,23 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
899953
offptr+=entryno;
900954
offset=*offptr;
901955

956+
Assert(offset!=0);
957+
902958
/*
903-
* How many members do we need to read? If we are at the end of the
904-
* assigned MultiXactIds, use the offset just saved above.Else we need
905-
* to check the MultiXactId following ours.
906-
*
907-
* Use the same increment rule as GetNewMultiXactId(), that is, don't handle
908-
* wraparound explicitly until needed.
959+
* Use the same increment rule as GetNewMultiXactId(), that is, don't
960+
* handle wraparound explicitly until needed.
909961
*/
910962
tmpMXact=multi+1;
911963

912964
if (nextMXact==tmpMXact)
965+
{
966+
/* Corner case 1: there is no next multixact */
913967
length=nextOffset-offset;
968+
}
914969
else
915970
{
971+
MultiXactOffsetnextMXOffset;
972+
916973
/* handle wraparound if needed */
917974
if (tmpMXact<FirstMultiXactId)
918975
tmpMXact=FirstMultiXactId;
@@ -927,7 +984,17 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
927984

928985
offptr= (MultiXactOffset*)MultiXactOffsetCtl->shared->page_buffer[slotno];
929986
offptr+=entryno;
930-
length=*offptr-offset;
987+
nextMXOffset=*offptr;
988+
989+
if (nextMXOffset==0)
990+
{
991+
/* Corner case 2: next multixact is still being filled in */
992+
LWLockRelease(MultiXactOffsetControlLock);
993+
pg_usleep(1000L);
994+
gotoretry;
995+
}
996+
997+
length=nextMXOffset-offset;
931998
}
932999

9331000
LWLockRelease(MultiXactOffsetControlLock);
@@ -938,6 +1005,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
9381005
/* Now get the members themselves. */
9391006
LWLockAcquire(MultiXactMemberControlLock,LW_EXCLUSIVE);
9401007

1008+
truelength=0;
9411009
prev_pageno=-1;
9421010
for (i=0;i<length;i++,offset++)
9431011
{
@@ -956,19 +1024,26 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
9561024
MultiXactMemberCtl->shared->page_buffer[slotno];
9571025
xactptr+=entryno;
9581026

959-
ptr[i]=*xactptr;
1027+
if (!TransactionIdIsValid(*xactptr))
1028+
{
1029+
/* Corner case 3: we must be looking at unused slot zero */
1030+
Assert(offset==0);
1031+
continue;
1032+
}
1033+
1034+
ptr[truelength++]=*xactptr;
9601035
}
9611036

9621037
LWLockRelease(MultiXactMemberControlLock);
9631038

9641039
/*
9651040
* Copy the result into the local cache.
9661041
*/
967-
mXactCachePut(multi,length,ptr);
1042+
mXactCachePut(multi,truelength,ptr);
9681043

9691044
debug_elog3(DEBUG2,"GetMembers: no cache for %s",
970-
mxid_to_string(multi,length,ptr));
971-
returnlength;
1045+
mxid_to_string(multi,truelength,ptr));
1046+
returntruelength;
9721047
}
9731048

9741049
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp