Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitccce90b

Browse files
committed
Use group updates when setting transaction status in clog.
Commit0e141c0 introduced a mechanismto reduce contention on ProcArrayLock by having a single process clearXIDs in the procArray on behalf of multiple processes, reducing theneed to hand the lock around. Use a similar mechanism to reducecontention on CLogControlLock. Testing shows that this verysignificantly reduces the amount of time waiting for CLogControlLockon high-concurrency pgbench tests run on a large multi-socketmachines; whether that translates into a TPS improvement depends onhow much of that contention is simply shifted to some other lock,particularly WALWriteLock.Amit Kapila, with some cosmetic changes by me. Extensively reviewed,tested, and benchmarked over a period of about 15 months by SimonRiggs, Robert Haas, Andres Freund, Jesper Pedersen, and especially byTomas Vondra and Dilip Kumar.Discussion:http://postgr.es/m/CAA4eK1L_snxM_JcrzEstNq9P66++F4kKFce=1r5+D1vzPofdtg@mail.gmail.comDiscussion:http://postgr.es/m/CAA4eK1LyR2A+m=RBSZ6rcPEwJ=rVi1ADPSndXHZdjn56yqO6Vg@mail.gmail.comDiscussion:http://postgr.es/m/91d57161-d3ea-0cc2-6066-80713e4f90d7@2ndquadrant.com
1 parentf077e1b commitccce90b

File tree

5 files changed

+268
-9
lines changed

5 files changed

+268
-9
lines changed

‎src/backend/access/transam/clog.c

Lines changed: 231 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,13 @@
3535
#include"access/clog.h"
3636
#include"access/slru.h"
3737
#include"access/transam.h"
38+
#include"access/twophase.h"
3839
#include"access/xlog.h"
3940
#include"access/xloginsert.h"
4041
#include"access/xlogutils.h"
4142
#include"miscadmin.h"
4243
#include"pg_trace.h"
44+
#include"storage/proc.h"
4345

4446
/*
4547
* Defines for CLOG page sizes. A page is the same BLCKSZ as is used
@@ -86,11 +88,17 @@ static void WriteZeroPageXlogRec(int pageno);
8688
staticvoidWriteTruncateXlogRec(intpageno);
8789
staticvoidTransactionIdSetPageStatus(TransactionIdxid,intnsubxids,
8890
TransactionId*subxids,XidStatusstatus,
89-
XLogRecPtrlsn,intpageno);
91+
XLogRecPtrlsn,intpageno,
92+
boolall_xact_same_page);
9093
staticvoidTransactionIdSetStatusBit(TransactionIdxid,XidStatusstatus,
9194
XLogRecPtrlsn,intslotno);
9295
staticvoidset_status_by_pages(intnsubxids,TransactionId*subxids,
9396
XidStatusstatus,XLogRecPtrlsn);
97+
staticboolTransactionGroupUpdateXidStatus(TransactionIdxid,XidStatusstatus,
98+
XLogRecPtrlsn,intpageno);
99+
staticvoidTransactionIdSetPageStatusInternal(TransactionIdxid,intnsubxids,
100+
TransactionId*subxids,XidStatusstatus,
101+
XLogRecPtrlsn,intpageno);
94102

95103

96104
/*
@@ -173,7 +181,7 @@ TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
173181
* Set the parent and all subtransactions in a single call
174182
*/
175183
TransactionIdSetPageStatus(xid,nsubxids,subxids,status,lsn,
176-
pageno);
184+
pageno, true);
177185
}
178186
else
179187
{
@@ -200,7 +208,7 @@ TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
200208
*/
201209
pageno=TransactionIdToPage(xid);
202210
TransactionIdSetPageStatus(xid,nsubxids_on_first_page,subxids,status,
203-
lsn,pageno);
211+
lsn,pageno, false);
204212

205213
/*
206214
* Now work through the rest of the subxids one clog page at a time,
@@ -238,7 +246,7 @@ set_status_by_pages(int nsubxids, TransactionId *subxids,
238246

239247
TransactionIdSetPageStatus(InvalidTransactionId,
240248
num_on_page,subxids+offset,
241-
status,lsn,pageno);
249+
status,lsn,pageno, false);
242250
offset=i;
243251
pageno=TransactionIdToPage(subxids[offset]);
244252
}
@@ -248,21 +256,78 @@ set_status_by_pages(int nsubxids, TransactionId *subxids,
248256
* Record the final state of transaction entries in the commit log for
249257
* all entries on a single page. Atomic only on this page.
250258
*
251-
* Otherwise API is same as TransactionIdSetTreeStatus()
259+
* When there is contention on CLogControlLock, we try to group multiple
260+
* updates; a single leader process will perform transaction status updates
261+
* for multiple backends so that the number of times CLogControlLock needs
262+
* to be acquired is reduced. We don't try to do this if a process has
263+
* overflowed the subxids array in its PGPROC, since in that case we
264+
* don't have a complete list of XIDs for it. We also skip it if a process
265+
* has XIDs on more than one CLOG page, or on a different CLOG page than
266+
* processes already waiting for a group update. This latter condition
267+
* has a race condition (see TransactionGroupUpdateXidStatus) but the
268+
* worst thing that happens if we mess up is a small loss of efficiency;
269+
* the intent is to avoid having the leader access pages it wouldn't
270+
* otherwise need to touch. Finally, we skip it for prepared transactions,
271+
* which don't have the semaphore we would need for this optimization,
272+
* and which are anyway probably not all that common.
252273
*/
253274
staticvoid
254275
TransactionIdSetPageStatus(TransactionIdxid,intnsubxids,
255276
TransactionId*subxids,XidStatusstatus,
256-
XLogRecPtrlsn,intpageno)
277+
XLogRecPtrlsn,intpageno,
278+
boolall_xact_same_page)
279+
{
280+
if (all_xact_same_page&&
281+
nsubxids<PGPROC_MAX_CACHED_SUBXIDS&&
282+
!IsGXactActive())
283+
{
284+
/*
285+
* If we can immediately acquire CLogControlLock, we update the status
286+
* of our own XID and release the lock. If not, try use group XID
287+
* update. If that doesn't work out, fall back to waiting for the
288+
* lock to perform an update for this transaction only.
289+
*/
290+
if (LWLockConditionalAcquire(CLogControlLock,LW_EXCLUSIVE))
291+
{
292+
TransactionIdSetPageStatusInternal(xid,nsubxids,subxids,status,lsn,pageno);
293+
LWLockRelease(CLogControlLock);
294+
}
295+
elseif (!TransactionGroupUpdateXidStatus(xid,status,lsn,pageno))
296+
{
297+
LWLockAcquire(CLogControlLock,LW_EXCLUSIVE);
298+
299+
TransactionIdSetPageStatusInternal(xid,nsubxids,subxids,status,lsn,pageno);
300+
301+
LWLockRelease(CLogControlLock);
302+
}
303+
}
304+
else
305+
{
306+
LWLockAcquire(CLogControlLock,LW_EXCLUSIVE);
307+
308+
TransactionIdSetPageStatusInternal(xid,nsubxids,subxids,status,lsn,pageno);
309+
310+
LWLockRelease(CLogControlLock);
311+
}
312+
}
313+
314+
/*
315+
* Record the final state of transaction entry in the commit log
316+
*
317+
* We don't do any locking here; caller must handle that.
318+
*/
319+
staticvoid
320+
TransactionIdSetPageStatusInternal(TransactionIdxid,intnsubxids,
321+
TransactionId*subxids,XidStatusstatus,
322+
XLogRecPtrlsn,intpageno)
257323
{
258324
intslotno;
259325
inti;
260326

261327
Assert(status==TRANSACTION_STATUS_COMMITTED||
262328
status==TRANSACTION_STATUS_ABORTED||
263329
(status==TRANSACTION_STATUS_SUB_COMMITTED&& !TransactionIdIsValid(xid)));
264-
265-
LWLockAcquire(CLogControlLock,LW_EXCLUSIVE);
330+
Assert(LWLockHeldByMeInMode(CLogControlLock,LW_EXCLUSIVE));
266331

267332
/*
268333
* If we're doing an async commit (ie, lsn is valid), then we must wait
@@ -310,8 +375,166 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
310375
}
311376

312377
ClogCtl->shared->page_dirty[slotno]= true;
378+
}
313379

380+
/*
381+
* When we cannot immediately acquire CLogControlLock in exclusive mode at
382+
* commit time, add ourselves to a list of processes that need their XIDs
383+
* status update. The first process to add itself to the list will acquire
384+
* CLogControlLock in exclusive mode and set transaction status as required
385+
* on behalf of all group members. This avoids a great deal of contention
386+
* around CLogControlLock when many processes are trying to commit at once,
387+
* since the lock need not be repeatedly handed off from one committing
388+
* process to the next.
389+
*
390+
* Returns true when transaction status has been updated in clog; returns
391+
* false if we decided against applying the optimization because the page
392+
* number we need to update differs from those processes already waiting.
393+
*/
394+
staticbool
395+
TransactionGroupUpdateXidStatus(TransactionIdxid,XidStatusstatus,
396+
XLogRecPtrlsn,intpageno)
397+
{
398+
volatilePROC_HDR*procglobal=ProcGlobal;
399+
PGPROC*proc=MyProc;
400+
uint32nextidx;
401+
uint32wakeidx;
402+
403+
/* We should definitely have an XID whose status needs to be updated. */
404+
Assert(TransactionIdIsValid(xid));
405+
406+
/*
407+
* Add ourselves to the list of processes needing a group XID status
408+
* update.
409+
*/
410+
proc->clogGroupMember= true;
411+
proc->clogGroupMemberXid=xid;
412+
proc->clogGroupMemberXidStatus=status;
413+
proc->clogGroupMemberPage=pageno;
414+
proc->clogGroupMemberLsn=lsn;
415+
416+
nextidx=pg_atomic_read_u32(&procglobal->clogGroupFirst);
417+
418+
while (true)
419+
{
420+
/*
421+
* Add the proc to list, if the clog page where we need to update the
422+
* current transaction status is same as group leader's clog page.
423+
*
424+
* There is a race condition here, which is that after doing the below
425+
* check and before adding this proc's clog update to a group, the
426+
* group leader might have already finished the group update for this
427+
* page and becomes group leader of another group. This will lead to a
428+
* situation where a single group can have different clog page
429+
* updates. This isn't likely and will still work, just maybe a bit
430+
* less efficiently.
431+
*/
432+
if (nextidx!=INVALID_PGPROCNO&&
433+
ProcGlobal->allProcs[nextidx].clogGroupMemberPage!=proc->clogGroupMemberPage)
434+
{
435+
proc->clogGroupMember= false;
436+
return false;
437+
}
438+
439+
pg_atomic_write_u32(&proc->clogGroupNext,nextidx);
440+
441+
if (pg_atomic_compare_exchange_u32(&procglobal->clogGroupFirst,
442+
&nextidx,
443+
(uint32)proc->pgprocno))
444+
break;
445+
}
446+
447+
/*
448+
* If the list was not empty, the leader will update the status of our
449+
* XID. It is impossible to have followers without a leader because the
450+
* first process that has added itself to the list will always have
451+
* nextidx as INVALID_PGPROCNO.
452+
*/
453+
if (nextidx!=INVALID_PGPROCNO)
454+
{
455+
intextraWaits=0;
456+
457+
/* Sleep until the leader updates our XID status. */
458+
for (;;)
459+
{
460+
/* acts as a read barrier */
461+
PGSemaphoreLock(proc->sem);
462+
if (!proc->clogGroupMember)
463+
break;
464+
extraWaits++;
465+
}
466+
467+
Assert(pg_atomic_read_u32(&proc->clogGroupNext)==INVALID_PGPROCNO);
468+
469+
/* Fix semaphore count for any absorbed wakeups */
470+
while (extraWaits-->0)
471+
PGSemaphoreUnlock(proc->sem);
472+
return true;
473+
}
474+
475+
/* We are the leader. Acquire the lock on behalf of everyone. */
476+
LWLockAcquire(CLogControlLock,LW_EXCLUSIVE);
477+
478+
/*
479+
* Now that we've got the lock, clear the list of processes waiting for
480+
* group XID status update, saving a pointer to the head of the list.
481+
* Trying to pop elements one at a time could lead to an ABA problem.
482+
*/
483+
nextidx=pg_atomic_exchange_u32(&procglobal->clogGroupFirst,INVALID_PGPROCNO);
484+
485+
/* Remember head of list so we can perform wakeups after dropping lock. */
486+
wakeidx=nextidx;
487+
488+
/* Walk the list and update the status of all XIDs. */
489+
while (nextidx!=INVALID_PGPROCNO)
490+
{
491+
PGPROC*proc=&ProcGlobal->allProcs[nextidx];
492+
PGXACT*pgxact=&ProcGlobal->allPgXact[nextidx];
493+
494+
/*
495+
* Overflowed transactions should not use group XID status update
496+
* mechanism.
497+
*/
498+
Assert(!pgxact->overflowed);
499+
500+
TransactionIdSetPageStatusInternal(proc->clogGroupMemberXid,
501+
pgxact->nxids,
502+
proc->subxids.xids,
503+
proc->clogGroupMemberXidStatus,
504+
proc->clogGroupMemberLsn,
505+
proc->clogGroupMemberPage);
506+
507+
/* Move to next proc in list. */
508+
nextidx=pg_atomic_read_u32(&proc->clogGroupNext);
509+
}
510+
511+
/* We're done with the lock now. */
314512
LWLockRelease(CLogControlLock);
513+
514+
/*
515+
* Now that we've released the lock, go back and wake everybody up. We
516+
* don't do this under the lock so as to keep lock hold times to a
517+
* minimum. The system calls we need to perform to wake other processes
518+
* up are probably slower and can cause performance slowdown if done under
519+
* lock.
520+
*/
521+
while (wakeidx!=INVALID_PGPROCNO)
522+
{
523+
PGPROC*proc=&ProcGlobal->allProcs[wakeidx];
524+
525+
wakeidx=pg_atomic_read_u32(&proc->clogGroupNext);
526+
pg_atomic_write_u32(&proc->clogGroupNext,INVALID_PGPROCNO);
527+
528+
/* ensure all previous writes are visible before follower continues. */
529+
pg_write_barrier();
530+
531+
proc->clogGroupMember= false;
532+
533+
if (proc!=MyProc)
534+
PGSemaphoreUnlock(proc->sem);
535+
}
536+
537+
return true;
315538
}
316539

317540
/*

‎src/backend/access/transam/twophase.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ static TwoPhaseStateData *TwoPhaseState;
176176
/*
177177
* Global transaction entry currently locked by us, if any.
178178
*/
179-
staticGlobalTransactionMyLockedGxact=NULL;
179+
GlobalTransactionMyLockedGxact=NULL;
180180

181181
staticbooltwophaseExitRegistered= false;
182182

‎src/backend/storage/lmgr/proc.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ InitProcGlobal(void)
186186
ProcGlobal->walwriterLatch=NULL;
187187
ProcGlobal->checkpointerLatch=NULL;
188188
pg_atomic_init_u32(&ProcGlobal->procArrayGroupFirst,INVALID_PGPROCNO);
189+
pg_atomic_init_u32(&ProcGlobal->clogGroupFirst,INVALID_PGPROCNO);
189190

190191
/*
191192
* Create and initialize all the PGPROC structures we'll need. There are
@@ -408,6 +409,14 @@ InitProcess(void)
408409
/* Initialize wait event information. */
409410
MyProc->wait_event_info=0;
410411

412+
/* Initialize fields for group transaction status update. */
413+
MyProc->clogGroupMember= false;
414+
MyProc->clogGroupMemberXid=InvalidTransactionId;
415+
MyProc->clogGroupMemberXidStatus=TRANSACTION_STATUS_IN_PROGRESS;
416+
MyProc->clogGroupMemberPage=-1;
417+
MyProc->clogGroupMemberLsn=InvalidXLogRecPtr;
418+
pg_atomic_init_u32(&MyProc->clogGroupNext,INVALID_PGPROCNO);
419+
411420
/*
412421
* Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
413422
* on it. That allows us to repoint the process latch, which so far

‎src/include/access/twophase.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
*/
2525
typedefstructGlobalTransactionData*GlobalTransaction;
2626

27+
externGlobalTransactionMyLockedGxact;
28+
2729
/* GUC variable */
2830
externintmax_prepared_xacts;
2931

@@ -36,6 +38,17 @@ extern void PostPrepare_Twophase(void);
3638
externPGPROC*TwoPhaseGetDummyProc(TransactionIdxid);
3739
externBackendIdTwoPhaseGetDummyBackendId(TransactionIdxid);
3840

41+
/*
42+
* IsGXactActive
43+
*Return true if there is a Global transaction entry currently
44+
*locked by us.
45+
*/
46+
staticinlinebool
47+
IsGXactActive(void)
48+
{
49+
returnMyLockedGxact ? true : false;
50+
}
51+
3952
externGlobalTransactionMarkAsPreparing(TransactionIdxid,constchar*gid,
4053
TimestampTzprepared_at,
4154
Oidowner,Oiddatabaseid);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp