Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9b38d46

Browse files
committed
Make group commit more effective.
When a backend needs to flush the WAL, and someone else is already flushingthe WAL, wait until it releases the WALInsertLock and check if we still needto do the flush or if the other backend already did the work for us, beforeacquiring WALInsertLock. This helps group commit, because when the WAL flushfinishes, all the backends that were waiting for it can be woken up in onego, and the can all concurrently observe that they're done, rather thanwaking them up one by one in a cascading fashion.This is based on a new LWLock function, LWLockWaitUntilFree(), which haspeculiar semantics. If the lock is immediately free, it grabs the lock andreturns true. If it's not free, it waits until it is released, but thenreturns false without grabbing the lock. This is used in XLogFlush(), sothat when the lock is acquired, the backend flushes the WAL, but if it'snot, the backend first checks the current flush location before retrying.Original patch and benchmarking by Peter Geoghegan and Simon Riggs, althoughthis patch as committed ended up being very different from that.
1 parentba1868b commit9b38d46

File tree

7 files changed

+200
-18
lines changed

7 files changed

+200
-18
lines changed

‎src/backend/access/transam/twophase.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ MarkAsPreparing(TransactionId xid, const char *gid,
327327
proc->databaseId=databaseid;
328328
proc->roleId=owner;
329329
proc->lwWaiting= false;
330-
proc->lwExclusive=false;
330+
proc->lwWaitMode=0;
331331
proc->lwWaitLink=NULL;
332332
proc->waitLock=NULL;
333333
proc->waitProcLock=NULL;

‎src/backend/access/transam/xlog.c

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2118,23 +2118,43 @@ XLogFlush(XLogRecPtr record)
21182118
/* initialize to given target; may increase below */
21192119
WriteRqstPtr=record;
21202120

2121-
/* read LogwrtResult and update local state */
2121+
/*
2122+
* Now wait until we get the write lock, or someone else does the
2123+
* flush for us.
2124+
*/
2125+
for (;;)
21222126
{
21232127
/* use volatile pointer to prevent code rearrangement */
21242128
volatileXLogCtlData*xlogctl=XLogCtl;
21252129

2130+
/* read LogwrtResult and update local state */
21262131
SpinLockAcquire(&xlogctl->info_lck);
21272132
if (XLByteLT(WriteRqstPtr,xlogctl->LogwrtRqst.Write))
21282133
WriteRqstPtr=xlogctl->LogwrtRqst.Write;
21292134
LogwrtResult=xlogctl->LogwrtResult;
21302135
SpinLockRelease(&xlogctl->info_lck);
2131-
}
21322136

2133-
/* done already? */
2134-
if (!XLByteLE(record,LogwrtResult.Flush))
2135-
{
2136-
/* now wait for the write lock */
2137-
LWLockAcquire(WALWriteLock,LW_EXCLUSIVE);
2137+
/* done already? */
2138+
if (XLByteLE(record,LogwrtResult.Flush))
2139+
break;
2140+
2141+
/*
2142+
* Try to get the write lock. If we can't get it immediately, wait
2143+
* until it's released, and recheck if we still need to do the flush
2144+
* or if the backend that held the lock did it for us already. This
2145+
* helps to maintain a good rate of group committing when the system
2146+
* is bottlenecked by the speed of fsyncing.
2147+
*/
2148+
if (!LWLockWaitUntilFree(WALWriteLock,LW_EXCLUSIVE))
2149+
{
2150+
/*
2151+
* The lock is now free, but we didn't acquire it yet. Before we
2152+
* do, loop back to check if someone else flushed the record for
2153+
* us already.
2154+
*/
2155+
continue;
2156+
}
2157+
/* Got the lock */
21382158
LogwrtResult=XLogCtl->Write.LogwrtResult;
21392159
if (!XLByteLE(record,LogwrtResult.Flush))
21402160
{
@@ -2163,6 +2183,8 @@ XLogFlush(XLogRecPtr record)
21632183
XLogWrite(WriteRqst, false, false);
21642184
}
21652185
LWLockRelease(WALWriteLock);
2186+
/* done */
2187+
break;
21662188
}
21672189

21682190
END_CRIT_SECTION();

‎src/backend/storage/lmgr/lwlock.c

Lines changed: 160 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
430430
elog(PANIC,"cannot wait without a PGPROC structure");
431431

432432
proc->lwWaiting= true;
433-
proc->lwExclusive=(mode==LW_EXCLUSIVE);
433+
proc->lwWaitMode=mode;
434434
proc->lwWaitLink=NULL;
435435
if (lock->head==NULL)
436436
lock->head=proc;
@@ -564,6 +564,144 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
564564
return !mustwait;
565565
}
566566

567+
/*
568+
* LWLockWaitUntilFree - Wait until a lock is free
569+
*
570+
* The semantics of this function are a bit funky. If the lock is currently
571+
* free, it is acquired in the given mode, and the function returns true. If
572+
* the lock isn't immediately free, the function waits until it is released
573+
* and returns false, but does not acquire the lock.
574+
*
575+
* This is currently used for WALWriteLock: when a backend flushes the WAL,
576+
* holding WALWriteLock, it can flush the commit records of many other
577+
* backends as a side-effect. Those other backends need to wait until the
578+
* flush finishes, but don't need to acquire the lock anymore. They can just
579+
* wake up, observe that their records have already been flushed, and return.
580+
*/
581+
bool
582+
LWLockWaitUntilFree(LWLockIdlockid,LWLockModemode)
583+
{
584+
volatileLWLock*lock=&(LWLockArray[lockid].lock);
585+
PGPROC*proc=MyProc;
586+
boolmustwait;
587+
intextraWaits=0;
588+
589+
PRINT_LWDEBUG("LWLockWaitUntilFree",lockid,lock);
590+
591+
/* Ensure we will have room to remember the lock */
592+
if (num_held_lwlocks >=MAX_SIMUL_LWLOCKS)
593+
elog(ERROR,"too many LWLocks taken");
594+
595+
/*
596+
* Lock out cancel/die interrupts until we exit the code section protected
597+
* by the LWLock. This ensures that interrupts will not interfere with
598+
* manipulations of data structures in shared memory.
599+
*/
600+
HOLD_INTERRUPTS();
601+
602+
/* Acquire mutex. Time spent holding mutex should be short! */
603+
SpinLockAcquire(&lock->mutex);
604+
605+
/* If I can get the lock, do so quickly. */
606+
if (mode==LW_EXCLUSIVE)
607+
{
608+
if (lock->exclusive==0&&lock->shared==0)
609+
{
610+
lock->exclusive++;
611+
mustwait= false;
612+
}
613+
else
614+
mustwait= true;
615+
}
616+
else
617+
{
618+
if (lock->exclusive==0)
619+
{
620+
lock->shared++;
621+
mustwait= false;
622+
}
623+
else
624+
mustwait= true;
625+
}
626+
627+
if (mustwait)
628+
{
629+
/*
630+
* Add myself to wait queue.
631+
*
632+
* If we don't have a PGPROC structure, there's no way to wait. This
633+
* should never occur, since MyProc should only be null during shared
634+
* memory initialization.
635+
*/
636+
if (proc==NULL)
637+
elog(PANIC,"cannot wait without a PGPROC structure");
638+
639+
proc->lwWaiting= true;
640+
proc->lwWaitMode=LW_WAIT_UNTIL_FREE;
641+
proc->lwWaitLink=NULL;
642+
if (lock->head==NULL)
643+
lock->head=proc;
644+
else
645+
lock->tail->lwWaitLink=proc;
646+
lock->tail=proc;
647+
648+
/* Can release the mutex now */
649+
SpinLockRelease(&lock->mutex);
650+
651+
/*
652+
* Wait until awakened. Like in LWLockAcquire, be prepared for bogus
653+
* wakups, because we share the semaphore with ProcWaitForSignal.
654+
*/
655+
LOG_LWDEBUG("LWLockWaitUntilFree",lockid,"waiting");
656+
657+
#ifdefLWLOCK_STATS
658+
block_counts[lockid]++;
659+
#endif
660+
661+
TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid,mode);
662+
663+
for (;;)
664+
{
665+
/* "false" means cannot accept cancel/die interrupt here. */
666+
PGSemaphoreLock(&proc->sem, false);
667+
if (!proc->lwWaiting)
668+
break;
669+
extraWaits++;
670+
}
671+
672+
TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid,mode);
673+
674+
LOG_LWDEBUG("LWLockWaitUntilFree",lockid,"awakened");
675+
}
676+
else
677+
{
678+
/* We are done updating shared state of the lock itself. */
679+
SpinLockRelease(&lock->mutex);
680+
}
681+
682+
/*
683+
* Fix the process wait semaphore's count for any absorbed wakeups.
684+
*/
685+
while (extraWaits-->0)
686+
PGSemaphoreUnlock(&proc->sem);
687+
688+
if (mustwait)
689+
{
690+
/* Failed to get lock, so release interrupt holdoff */
691+
RESUME_INTERRUPTS();
692+
LOG_LWDEBUG("LWLockWaitUntilFree",lockid,"failed");
693+
TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE_FAIL(lockid,mode);
694+
}
695+
else
696+
{
697+
/* Add lock to list of locks held by this backend */
698+
held_lwlocks[num_held_lwlocks++]=lockid;
699+
TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE(lockid,mode);
700+
}
701+
702+
return !mustwait;
703+
}
704+
567705
/*
568706
* LWLockRelease - release a previously acquired lock
569707
*/
@@ -618,20 +756,36 @@ LWLockRelease(LWLockId lockid)
618756
/*
619757
* Remove the to-be-awakened PGPROCs from the queue. If the front
620758
* waiter wants exclusive lock, awaken him only. Otherwise awaken
621-
* as many waiters as want shared access.
759+
* as many waiters as want shared access (or just want to be
760+
* woken up when the lock becomes free without acquiring it,
761+
* ie. LWLockWaitUntilFree).
622762
*/
763+
boolreleaseOK= true;
764+
623765
proc=head;
624-
if (!proc->lwExclusive)
766+
if (proc->lwWaitMode!=LW_EXCLUSIVE)
625767
{
626768
while (proc->lwWaitLink!=NULL&&
627-
!proc->lwWaitLink->lwExclusive)
769+
proc->lwWaitLink->lwWaitMode!=LW_EXCLUSIVE)
770+
{
628771
proc=proc->lwWaitLink;
772+
if (proc->lwWaitMode!=LW_WAIT_UNTIL_FREE)
773+
releaseOK= false;
774+
}
629775
}
630776
/* proc is now the last PGPROC to be released */
631777
lock->head=proc->lwWaitLink;
632778
proc->lwWaitLink=NULL;
633-
/* prevent additional wakeups until retryer gets to run */
634-
lock->releaseOK= false;
779+
/*
780+
* Prevent additional wakeups until retryer gets to run. Backends
781+
* that are just waiting for the lock to become free don't prevent
782+
* wakeups, because they might decide that they don't want the
783+
* lock, after all.
784+
*/
785+
if (proc->lwWaitMode!=LW_WAIT_UNTIL_FREE)
786+
releaseOK= false;
787+
788+
lock->releaseOK=releaseOK;
635789
}
636790
else
637791
{

‎src/backend/storage/lmgr/proc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ InitProcess(void)
362362
if (IsAutoVacuumWorkerProcess())
363363
MyPgXact->vacuumFlags |=PROC_IS_AUTOVACUUM;
364364
MyProc->lwWaiting= false;
365-
MyProc->lwExclusive=false;
365+
MyProc->lwWaitMode=0;
366366
MyProc->lwWaitLink=NULL;
367367
MyProc->waitLock=NULL;
368368
MyProc->waitProcLock=NULL;
@@ -517,7 +517,7 @@ InitAuxiliaryProcess(void)
517517
MyPgXact->inCommit= false;
518518
MyPgXact->vacuumFlags=0;
519519
MyProc->lwWaiting= false;
520-
MyProc->lwExclusive=false;
520+
MyProc->lwWaitMode=0;
521521
MyProc->lwWaitLink=NULL;
522522
MyProc->waitLock=NULL;
523523
MyProc->waitProcLock=NULL;

‎src/backend/utils/probes.d

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ provider postgresql {
3535
probelwlock__wait__done(LWLockId,LWLockMode);
3636
probelwlock__condacquire(LWLockId,LWLockMode);
3737
probelwlock__condacquire__fail(LWLockId,LWLockMode);
38+
probelwlock__wait__until__free(LWLockId,LWLockMode);
39+
probelwlock__wait__until__free__fail(LWLockId,LWLockMode);
3840

3941
probelock__wait__start(unsignedint,unsignedint,unsignedint,unsignedint,unsignedint,LOCKMODE);
4042
probelock__wait__done(unsignedint,unsignedint,unsignedint,unsignedint,unsignedint,LOCKMODE);

‎src/include/storage/lwlock.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,10 @@ typedef enum LWLockId
9494
typedefenumLWLockMode
9595
{
9696
LW_EXCLUSIVE,
97-
LW_SHARED
97+
LW_SHARED,
98+
LW_WAIT_UNTIL_FREE/* A special mode used in PGPROC->lwlockMode, when
99+
* waiting for lock to become free. Not to be used
100+
* as LWLockAcquire argument */
98101
}LWLockMode;
99102

100103

@@ -105,6 +108,7 @@ extern bool Trace_lwlocks;
105108
externLWLockIdLWLockAssign(void);
106109
externvoidLWLockAcquire(LWLockIdlockid,LWLockModemode);
107110
externboolLWLockConditionalAcquire(LWLockIdlockid,LWLockModemode);
111+
externboolLWLockWaitUntilFree(LWLockIdlockid,LWLockModemode);
108112
externvoidLWLockRelease(LWLockIdlockid);
109113
externvoidLWLockReleaseAll(void);
110114
externboolLWLockHeldByMe(LWLockIdlockid);

‎src/include/storage/proc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ struct PGPROC
101101

102102
/* Info about LWLock the process is currently waiting for, if any. */
103103
boollwWaiting;/* true if waiting for an LW lock */
104-
boollwExclusive;/*true if waiting for exclusive access */
104+
uint8lwWaitMode;/*lwlock mode being waited for */
105105
structPGPROC*lwWaitLink;/* next waiter for same LW lock */
106106

107107
/* Info about lock the process is currently waiting for, if any. */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp