Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf868a81

Browse files
committed
Fix longstanding recursion hazard in sinval message processing.
LockRelationOid and sibling routines supposed that, if our session alreadyholds the lock they were asked to acquire, they could skip callingAcceptInvalidationMessages on the grounds that we must have already readany remote sinval messages issued against the relation being locked.This is normally true, but there's a critical special case where it's not:processing inside AcceptInvalidationMessages might attempt to access systemrelations, resulting in a recursive call to acquire a relation lock.Hence, if the outer call had acquired that same system catalog lock, we'dfall through, despite the possibility that there's an as-yet-unread sinvalmessage for that system catalog. This could, for example, result infailure to access a system catalog or index that had just been processedby VACUUM FULL. This is the explanation for buildfarm failures we've beenseeing intermittently for the past three months. The bug is far olderthan that, but commitsa54e1f1 et al added a new recursion case withinAcceptInvalidationMessages that is apparently easier to hit than anyprevious case.To fix this, we must not skip calling AcceptInvalidationMessages untilwe have *finished* a call to it since acquiring a relation lock, notmerely acquired the lock. (There's already adequate logic insideAcceptInvalidationMessages to deal with being called recursively.)Fortunately, we can implement that at trivial cost, by adding a flagto LOCALLOCK hashtable entries that tracks whether we know we havecompleted such a call.There is an API hazard added by this patch for external callers ofLockAcquire: if anything is testing for LOCKACQUIRE_ALREADY_HELD,it might be fooled by the new return code LOCKACQUIRE_ALREADY_CLEARinto thinking the lock wasn't already held. This should be a fail-softcondition, though, unless something very bizarre is being done inresponse to the test.Also, I added an additional output argument to LockAcquireExtended,assuming that that probably isn't called by any outside code giventhe very limited usefulness of its additional functionality.Back-patch to all supported branches.Discussion:https://postgr.es/m/12259.1532117714@sss.pgh.pa.us
1 parent8582b4d commitf868a81

File tree

4 files changed

+92
-16
lines changed

4 files changed

+92
-16
lines changed

‎src/backend/storage/ipc/standby.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,8 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
661661

662662
SET_LOCKTAG_RELATION(locktag,newlock->dbOid,newlock->relOid);
663663

664-
LockAcquireExtended(&locktag,AccessExclusiveLock, true, false, false);
664+
(void)LockAcquireExtended(&locktag,AccessExclusiveLock, true, false,
665+
false,NULL);
665666
}
666667

667668
staticvoid

‎src/backend/storage/lmgr/lmgr.c

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,12 @@ void
105105
LockRelationOid(Oidrelid,LOCKMODElockmode)
106106
{
107107
LOCKTAGtag;
108+
LOCALLOCK*locallock;
108109
LockAcquireResultres;
109110

110111
SetLocktagRelationOid(&tag,relid);
111112

112-
res=LockAcquire(&tag,lockmode, false, false);
113+
res=LockAcquireExtended(&tag,lockmode, false, false, true,&locallock);
113114

114115
/*
115116
* Now that we have the lock, check for invalidation messages, so that we
@@ -120,9 +121,18 @@ LockRelationOid(Oid relid, LOCKMODE lockmode)
120121
* relcache entry in an undesirable way. (In the case where our own xact
121122
* modifies the rel, the relcache update happens via
122123
* CommandCounterIncrement, not here.)
124+
*
125+
* However, in corner cases where code acts on tables (usually catalogs)
126+
* recursively, we might get here while still processing invalidation
127+
* messages in some outer execution of this function or a sibling. The
128+
* "cleared" status of the lock tells us whether we really are done
129+
* absorbing relevant inval messages.
123130
*/
124-
if (res!=LOCKACQUIRE_ALREADY_HELD)
131+
if (res!=LOCKACQUIRE_ALREADY_CLEAR)
132+
{
125133
AcceptInvalidationMessages();
134+
MarkLockClear(locallock);
135+
}
126136
}
127137

128138
/*
@@ -138,11 +148,12 @@ bool
138148
ConditionalLockRelationOid(Oidrelid,LOCKMODElockmode)
139149
{
140150
LOCKTAGtag;
151+
LOCALLOCK*locallock;
141152
LockAcquireResultres;
142153

143154
SetLocktagRelationOid(&tag,relid);
144155

145-
res=LockAcquire(&tag,lockmode, false, true);
156+
res=LockAcquireExtended(&tag,lockmode, false, true, true,&locallock);
146157

147158
if (res==LOCKACQUIRE_NOT_AVAIL)
148159
return false;
@@ -151,8 +162,11 @@ ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode)
151162
* Now that we have the lock, check for invalidation messages; see notes
152163
* in LockRelationOid.
153164
*/
154-
if (res!=LOCKACQUIRE_ALREADY_HELD)
165+
if (res!=LOCKACQUIRE_ALREADY_CLEAR)
166+
{
155167
AcceptInvalidationMessages();
168+
MarkLockClear(locallock);
169+
}
156170

157171
return true;
158172
}
@@ -199,20 +213,24 @@ void
199213
LockRelation(Relationrelation,LOCKMODElockmode)
200214
{
201215
LOCKTAGtag;
216+
LOCALLOCK*locallock;
202217
LockAcquireResultres;
203218

204219
SET_LOCKTAG_RELATION(tag,
205220
relation->rd_lockInfo.lockRelId.dbId,
206221
relation->rd_lockInfo.lockRelId.relId);
207222

208-
res=LockAcquire(&tag,lockmode, false, false);
223+
res=LockAcquireExtended(&tag,lockmode, false, false, true,&locallock);
209224

210225
/*
211226
* Now that we have the lock, check for invalidation messages; see notes
212227
* in LockRelationOid.
213228
*/
214-
if (res!=LOCKACQUIRE_ALREADY_HELD)
229+
if (res!=LOCKACQUIRE_ALREADY_CLEAR)
230+
{
215231
AcceptInvalidationMessages();
232+
MarkLockClear(locallock);
233+
}
216234
}
217235

218236
/*
@@ -226,13 +244,14 @@ bool
226244
ConditionalLockRelation(Relationrelation,LOCKMODElockmode)
227245
{
228246
LOCKTAGtag;
247+
LOCALLOCK*locallock;
229248
LockAcquireResultres;
230249

231250
SET_LOCKTAG_RELATION(tag,
232251
relation->rd_lockInfo.lockRelId.dbId,
233252
relation->rd_lockInfo.lockRelId.relId);
234253

235-
res=LockAcquire(&tag,lockmode, false, true);
254+
res=LockAcquireExtended(&tag,lockmode, false, true, true,&locallock);
236255

237256
if (res==LOCKACQUIRE_NOT_AVAIL)
238257
return false;
@@ -241,8 +260,11 @@ ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
241260
* Now that we have the lock, check for invalidation messages; see notes
242261
* in LockRelationOid.
243262
*/
244-
if (res!=LOCKACQUIRE_ALREADY_HELD)
263+
if (res!=LOCKACQUIRE_ALREADY_CLEAR)
264+
{
245265
AcceptInvalidationMessages();
266+
MarkLockClear(locallock);
267+
}
246268

247269
return true;
248270
}

‎src/backend/storage/lmgr/lock.c

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
669669
*LOCKACQUIRE_NOT_AVAILlock not available, and dontWait=true
670670
*LOCKACQUIRE_OKlock successfully acquired
671671
*LOCKACQUIRE_ALREADY_HELDincremented count for lock already held
672+
*LOCKACQUIRE_ALREADY_CLEARincremented count for lock already clear
672673
*
673674
* In the normal case where dontWait=false and the caller doesn't need to
674675
* distinguish a freshly acquired lock from one already taken earlier in
@@ -685,7 +686,8 @@ LockAcquire(const LOCKTAG *locktag,
685686
boolsessionLock,
686687
booldontWait)
687688
{
688-
returnLockAcquireExtended(locktag,lockmode,sessionLock,dontWait, true);
689+
returnLockAcquireExtended(locktag,lockmode,sessionLock,dontWait,
690+
true,NULL);
689691
}
690692

691693
/*
@@ -696,13 +698,17 @@ LockAcquire(const LOCKTAG *locktag,
696698
* caller to note that the lock table is full and then begin taking
697699
* extreme action to reduce the number of other lock holders before
698700
* retrying the action.
701+
*
702+
* If locallockp isn't NULL, *locallockp receives a pointer to the LOCALLOCK
703+
* table entry if a lock is successfully acquired, or NULL if not.
699704
*/
700705
LockAcquireResult
701706
LockAcquireExtended(constLOCKTAG*locktag,
702707
LOCKMODElockmode,
703708
boolsessionLock,
704709
booldontWait,
705-
boolreportMemoryError)
710+
boolreportMemoryError,
711+
LOCALLOCK**locallockp)
706712
{
707713
LOCKMETHODIDlockmethodid=locktag->locktag_lockmethodid;
708714
LockMethodlockMethodTable;
@@ -766,9 +772,10 @@ LockAcquireExtended(const LOCKTAG *locktag,
766772
locallock->proclock=NULL;
767773
locallock->hashcode=LockTagHashCode(&(localtag.lock));
768774
locallock->nLocks=0;
775+
locallock->holdsStrongLockCount= false;
776+
locallock->lockCleared= false;
769777
locallock->numLockOwners=0;
770778
locallock->maxLockOwners=8;
771-
locallock->holdsStrongLockCount= false;
772779
locallock->lockOwners=NULL;/* in case next line fails */
773780
locallock->lockOwners= (LOCALLOCKOWNER*)
774781
MemoryContextAlloc(TopMemoryContext,
@@ -789,13 +796,22 @@ LockAcquireExtended(const LOCKTAG *locktag,
789796
}
790797
hashcode=locallock->hashcode;
791798

799+
if (locallockp)
800+
*locallockp=locallock;
801+
792802
/*
793803
* If we already hold the lock, we can just increase the count locally.
804+
*
805+
* If lockCleared is already set, caller need not worry about absorbing
806+
* sinval messages related to the lock's object.
794807
*/
795808
if (locallock->nLocks>0)
796809
{
797810
GrantLockLocal(locallock,owner);
798-
returnLOCKACQUIRE_ALREADY_HELD;
811+
if (locallock->lockCleared)
812+
returnLOCKACQUIRE_ALREADY_CLEAR;
813+
else
814+
returnLOCKACQUIRE_ALREADY_HELD;
799815
}
800816

801817
/*
@@ -877,6 +893,10 @@ LockAcquireExtended(const LOCKTAG *locktag,
877893
hashcode))
878894
{
879895
AbortStrongLockAcquire();
896+
if (locallock->nLocks==0)
897+
RemoveLocalLock(locallock);
898+
if (locallockp)
899+
*locallockp=NULL;
880900
if (reportMemoryError)
881901
ereport(ERROR,
882902
(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -911,6 +931,10 @@ LockAcquireExtended(const LOCKTAG *locktag,
911931
{
912932
AbortStrongLockAcquire();
913933
LWLockRelease(partitionLock);
934+
if (locallock->nLocks==0)
935+
RemoveLocalLock(locallock);
936+
if (locallockp)
937+
*locallockp=NULL;
914938
if (reportMemoryError)
915939
ereport(ERROR,
916940
(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -976,6 +1000,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
9761000
LWLockRelease(partitionLock);
9771001
if (locallock->nLocks==0)
9781002
RemoveLocalLock(locallock);
1003+
if (locallockp)
1004+
*locallockp=NULL;
9791005
returnLOCKACQUIRE_NOT_AVAIL;
9801006
}
9811007

@@ -1645,6 +1671,20 @@ GrantAwaitedLock(void)
16451671
GrantLockLocal(awaitedLock,awaitedOwner);
16461672
}
16471673

1674+
/*
1675+
* MarkLockClear -- mark an acquired lock as "clear"
1676+
*
1677+
* This means that we know we have absorbed all sinval messages that other
1678+
* sessions generated before we acquired this lock, and so we can confidently
1679+
* assume we know about any catalog changes protected by this lock.
1680+
*/
1681+
void
1682+
MarkLockClear(LOCALLOCK*locallock)
1683+
{
1684+
Assert(locallock->nLocks>0);
1685+
locallock->lockCleared= true;
1686+
}
1687+
16481688
/*
16491689
* WaitOnLock -- wait to acquire a lock
16501690
*
@@ -1909,6 +1949,15 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
19091949
if (locallock->nLocks>0)
19101950
return true;
19111951

1952+
/*
1953+
* At this point we can no longer suppose we are clear of invalidation
1954+
* messages related to this lock. Although we'll delete the LOCALLOCK
1955+
* object before any intentional return from this routine, it seems worth
1956+
* the trouble to explicitly reset lockCleared right now, just in case
1957+
* some error prevents us from deleting the LOCALLOCK.
1958+
*/
1959+
locallock->lockCleared= false;
1960+
19121961
/* Attempt fast release of any lock eligible for the fast path. */
19131962
if (EligibleForRelationFastPath(locktag,lockmode)&&
19141963
FastPathLocalUseCount>0)

‎src/include/storage/lock.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,10 @@ typedef struct LOCALLOCK
408408
PROCLOCK*proclock;/* associated PROCLOCK object, if any */
409409
uint32hashcode;/* copy of LOCKTAG's hash value */
410410
int64nLocks;/* total number of times lock is held */
411+
boolholdsStrongLockCount;/* bumped FastPathStrongRelationLocks */
412+
boollockCleared;/* we read all sinval msgs for lock */
411413
intnumLockOwners;/* # of relevant ResourceOwners */
412414
intmaxLockOwners;/* allocated size of array */
413-
boolholdsStrongLockCount;/* bumped FastPathStrongRelationLocks */
414415
LOCALLOCKOWNER*lockOwners;/* dynamically resizable array */
415416
}LOCALLOCK;
416417

@@ -472,7 +473,8 @@ typedef enum
472473
{
473474
LOCKACQUIRE_NOT_AVAIL,/* lock not available, and dontWait=true */
474475
LOCKACQUIRE_OK,/* lock successfully acquired */
475-
LOCKACQUIRE_ALREADY_HELD/* incremented count for lock already held */
476+
LOCKACQUIRE_ALREADY_HELD,/* incremented count for lock already held */
477+
LOCKACQUIRE_ALREADY_CLEAR/* incremented count for lock already clear */
476478
}LockAcquireResult;
477479

478480
/* Deadlock states identified by DeadLockCheck() */
@@ -528,8 +530,10 @@ extern LockAcquireResult LockAcquireExtended(const LOCKTAG *locktag,
528530
LOCKMODElockmode,
529531
boolsessionLock,
530532
booldontWait,
531-
boolreport_memory_error);
533+
boolreportMemoryError,
534+
LOCALLOCK**locallockp);
532535
externvoidAbortStrongLockAcquire(void);
536+
externvoidMarkLockClear(LOCALLOCK*locallock);
533537
externboolLockRelease(constLOCKTAG*locktag,
534538
LOCKMODElockmode,boolsessionLock);
535539
externvoidLockReleaseAll(LOCKMETHODIDlockmethodid,boolallLocks);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp