Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5e89985

Browse files
committed
bufmgr: Don't lock buffer header in StrategyGetBuffer()
Previously StrategyGetBuffer() acquired the buffer header spinlock for everybuffer, whether it was reusable or not. If reusable, it'd be returned, withthe lock held, to GetVictimBuffer(), which then would pin the buffer withPinBuffer_Locked(). That's somewhat violating the spirit of the guidelines forholding spinlocks (i.e. that they are only held for a few lines of consecutivecode) and necessitates using PinBuffer_Locked(), which scales worse thanPinBuffer() due to holding the spinlock. This alone makes it worth changingthe code.However, the main reason to change this is that a future commit will makePinBuffer_Locked() slower (due to making UnlockBufHdr() slower), to gainscalability for the much more common case of pinning a pre-existing buffer. Bypinning the buffer with a single atomic operation, iff the buffer is reusable,we avoid any potential regression for miss-heavy workloads. There strictly arefewer atomic operations for each potential buffer after this change.The price for this improvement is that freelist.c needs two CAS loops andneeds to be able to set up the resource accounting for pinned buffers. Thelatter is achieved by exposing a new function for that purpose from bufmgr.c,that seems better than exposing the entire private refcount infrastructure.The improvement seems worth the complexity.Reviewed-by: Robert Haas <robertmhaas@gmail.com>Reviewed-by: Matthias van de Meent <boekewurm+postgres@gmail.com>Discussion:https://postgr.es/m/fvfmkr5kk4nyex56ejgxj3uzi63isfxovp2biecb4bspbjrze7@az2pljabhnff
1 parent3baae90 commit5e89985

File tree

3 files changed

+132
-66
lines changed

3 files changed

+132
-66
lines changed

‎src/backend/storage/buffer/bufmgr.c‎

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,6 @@ static void PinBuffer_Locked(BufferDesc *buf);
518518
staticvoidUnpinBuffer(BufferDesc*buf);
519519
staticvoidUnpinBufferNoOwner(BufferDesc*buf);
520520
staticvoidBufferSync(intflags);
521-
staticuint32WaitBufHdrUnlocked(BufferDesc*buf);
522521
staticintSyncOneBuffer(intbuf_id,boolskip_recently_used,
523522
WritebackContext*wb_context);
524523
staticvoidWaitIO(BufferDesc*buf);
@@ -2326,8 +2325,8 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
23262325
boolfrom_ring;
23272326

23282327
/*
2329-
* Ensure,while the spinlock's not yet held, that there's a free refcount
2330-
* entry, and a resource owner slot for the pin.
2328+
* Ensure,before we pin a victim buffer, that there's a free refcount
2329+
* entry and resource owner slot for the pin.
23312330
*/
23322331
ReservePrivateRefCountEntry();
23332332
ResourceOwnerEnlarge(CurrentResourceOwner);
@@ -2336,17 +2335,12 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
23362335
again:
23372336

23382337
/*
2339-
* Select a victim buffer. The buffer is returnedwith its header
2340-
*spinlock still held!
2338+
* Select a victim buffer. The buffer is returnedpinned and owned by
2339+
*this backend.
23412340
*/
23422341
buf_hdr=StrategyGetBuffer(strategy,&buf_state,&from_ring);
23432342
buf=BufferDescriptorGetBuffer(buf_hdr);
23442343

2345-
Assert(BUF_STATE_GET_REFCOUNT(buf_state)==0);
2346-
2347-
/* Pin the buffer and then release the buffer spinlock */
2348-
PinBuffer_Locked(buf_hdr);
2349-
23502344
/*
23512345
* We shouldn't have any other pins for this buffer.
23522346
*/
@@ -3118,7 +3112,7 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy,
31183112
{
31193113
result= (buf_state&BM_VALID)!=0;
31203114

3121-
ref=NewPrivateRefCountEntry(b);
3115+
TrackNewBufferPin(b);
31223116

31233117
/*
31243118
* Assume that we acquired a buffer pin for the purposes of
@@ -3150,11 +3144,12 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy,
31503144
* cannot meddle with that.
31513145
*/
31523146
result= (pg_atomic_read_u32(&buf->state)&BM_VALID)!=0;
3147+
3148+
Assert(ref->refcount>0);
3149+
ref->refcount++;
3150+
ResourceOwnerRememberBuffer(CurrentResourceOwner,b);
31533151
}
31543152

3155-
ref->refcount++;
3156-
Assert(ref->refcount>0);
3157-
ResourceOwnerRememberBuffer(CurrentResourceOwner,b);
31583153
returnresult;
31593154
}
31603155

@@ -3183,8 +3178,6 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy,
31833178
staticvoid
31843179
PinBuffer_Locked(BufferDesc*buf)
31853180
{
3186-
Bufferb;
3187-
PrivateRefCountEntry*ref;
31883181
uint32buf_state;
31893182

31903183
/*
@@ -3209,12 +3202,7 @@ PinBuffer_Locked(BufferDesc *buf)
32093202
buf_state+=BUF_REFCOUNT_ONE;
32103203
UnlockBufHdr(buf,buf_state);
32113204

3212-
b=BufferDescriptorGetBuffer(buf);
3213-
3214-
ref=NewPrivateRefCountEntry(b);
3215-
ref->refcount++;
3216-
3217-
ResourceOwnerRememberBuffer(CurrentResourceOwner,b);
3205+
TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
32183206
}
32193207

32203208
/*
@@ -3332,6 +3320,17 @@ UnpinBufferNoOwner(BufferDesc *buf)
33323320
}
33333321
}
33343322

3323+
inlinevoid
3324+
TrackNewBufferPin(Bufferbuf)
3325+
{
3326+
PrivateRefCountEntry*ref;
3327+
3328+
ref=NewPrivateRefCountEntry(buf);
3329+
ref->refcount++;
3330+
3331+
ResourceOwnerRememberBuffer(CurrentResourceOwner,buf);
3332+
}
3333+
33353334
#defineST_SORT sort_checkpoint_bufferids
33363335
#defineST_ELEMENT_TYPE CkptSortItem
33373336
#defineST_COMPARE(a,b) ckpt_buforder_comparator(a, b)
@@ -6288,7 +6287,7 @@ LockBufHdr(BufferDesc *desc)
62886287
* Obviously the buffer could be locked by the time the value is returned, so
62896288
* this is primarily useful in CAS style loops.
62906289
*/
6291-
staticuint32
6290+
pg_noinlineuint32
62926291
WaitBufHdrUnlocked(BufferDesc*buf)
62936292
{
62946293
SpinDelayStatusdelayStatus;

‎src/backend/storage/buffer/freelist.c‎

Lines changed: 106 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -159,21 +159,23 @@ ClockSweepTick(void)
159159
* StrategyGetBuffer
160160
*
161161
*Called by the bufmgr to get the next candidate buffer to use in
162-
*BufferAlloc(). The only hard requirementBufferAlloc() has is that
162+
*GetVictimBuffer(). The only hard requirementGetVictimBuffer() has is that
163163
*the selected buffer must not currently be pinned by anyone.
164164
*
165165
*strategy is a BufferAccessStrategy object, or NULL for default strategy.
166166
*
167-
*To ensure that no one else can pin the buffer before we do, we must
168-
*return the buffer with the buffer header spinlock still held.
167+
*It is the callers responsibility to ensure the buffer ownership can be
168+
*tracked via TrackNewBufferPin().
169+
*
170+
*The buffer is pinned and marked as owned, using TrackNewBufferPin(),
171+
*before returning.
169172
*/
170173
BufferDesc*
171174
StrategyGetBuffer(BufferAccessStrategystrategy,uint32*buf_state,bool*from_ring)
172175
{
173176
BufferDesc*buf;
174177
intbgwprocno;
175178
inttrycounter;
176-
uint32local_buf_state;/* to avoid repeated (de-)referencing */
177179

178180
*from_ring= false;
179181

@@ -228,44 +230,79 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r
228230
trycounter=NBuffers;
229231
for (;;)
230232
{
233+
uint32old_buf_state;
234+
uint32local_buf_state;
235+
231236
buf=GetBufferDescriptor(ClockSweepTick());
232237

233238
/*
234-
*Ifthe bufferis pinned or has a nonzero usage_count, we cannot use
235-
*it; decrement the usage_count (unless pinned) and keep scanning.
239+
*Check whetherthe buffercan be used and pin it if so. Do this
240+
*using a CAS loop, to avoid having to lock the buffer header.
236241
*/
237-
local_buf_state=LockBufHdr(buf);
238-
239-
if (BUF_STATE_GET_REFCOUNT(local_buf_state)==0)
242+
old_buf_state=pg_atomic_read_u32(&buf->state);
243+
for (;;)
240244
{
245+
local_buf_state=old_buf_state;
246+
247+
/*
248+
* If the buffer is pinned or has a nonzero usage_count, we cannot
249+
* use it; decrement the usage_count (unless pinned) and keep
250+
* scanning.
251+
*/
252+
253+
if (BUF_STATE_GET_REFCOUNT(local_buf_state)!=0)
254+
{
255+
if (--trycounter==0)
256+
{
257+
/*
258+
* We've scanned all the buffers without making any state
259+
* changes, so all the buffers are pinned (or were when we
260+
* looked at them). We could hope that someone will free
261+
* one eventually, but it's probably better to fail than
262+
* to risk getting stuck in an infinite loop.
263+
*/
264+
elog(ERROR,"no unpinned buffers available");
265+
}
266+
break;
267+
}
268+
269+
if (unlikely(local_buf_state&BM_LOCKED))
270+
{
271+
old_buf_state=WaitBufHdrUnlocked(buf);
272+
continue;
273+
}
274+
241275
if (BUF_STATE_GET_USAGECOUNT(local_buf_state)!=0)
242276
{
243277
local_buf_state-=BUF_USAGECOUNT_ONE;
244278

245-
trycounter=NBuffers;
279+
if (pg_atomic_compare_exchange_u32(&buf->state,&old_buf_state,
280+
local_buf_state))
281+
{
282+
trycounter=NBuffers;
283+
break;
284+
}
246285
}
247286
else
248287
{
249-
/* Found a usable buffer */
250-
if (strategy!=NULL)
251-
AddBufferToRing(strategy,buf);
252-
*buf_state=local_buf_state;
253-
returnbuf;
288+
/* pin the buffer if the CAS succeeds */
289+
local_buf_state+=BUF_REFCOUNT_ONE;
290+
291+
if (pg_atomic_compare_exchange_u32(&buf->state,&old_buf_state,
292+
local_buf_state))
293+
{
294+
/* Found a usable buffer */
295+
if (strategy!=NULL)
296+
AddBufferToRing(strategy,buf);
297+
*buf_state=local_buf_state;
298+
299+
TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
300+
301+
returnbuf;
302+
}
254303
}
304+
255305
}
256-
elseif (--trycounter==0)
257-
{
258-
/*
259-
* We've scanned all the buffers without making any state changes,
260-
* so all the buffers are pinned (or were when we looked at them).
261-
* We could hope that someone will free one eventually, but it's
262-
* probably better to fail than to risk getting stuck in an
263-
* infinite loop.
264-
*/
265-
UnlockBufHdr(buf,local_buf_state);
266-
elog(ERROR,"no unpinned buffers available");
267-
}
268-
UnlockBufHdr(buf,local_buf_state);
269306
}
270307
}
271308

@@ -614,13 +651,15 @@ FreeAccessStrategy(BufferAccessStrategy strategy)
614651
* GetBufferFromRing -- returns a buffer from the ring, or NULL if the
615652
*ring is empty / not usable.
616653
*
617-
* The bufhdr spin lock is held on the returned buffer.
654+
* The buffer is pinned and marked as owned, using TrackNewBufferPin(), before
655+
* returning.
618656
*/
619657
staticBufferDesc*
620658
GetBufferFromRing(BufferAccessStrategystrategy,uint32*buf_state)
621659
{
622660
BufferDesc*buf;
623661
Bufferbufnum;
662+
uint32old_buf_state;
624663
uint32local_buf_state;/* to avoid repeated (de-)referencing */
625664

626665

@@ -637,24 +676,48 @@ GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
637676
if (bufnum==InvalidBuffer)
638677
returnNULL;
639678

679+
buf=GetBufferDescriptor(bufnum-1);
680+
640681
/*
641-
* If the buffer is pinned we cannot use it under any circumstances.
642-
*
643-
* If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
644-
* since our own previous usage of the ring element would have left it
645-
* there, but it might've been decremented by clock-sweep since then). A
646-
* higher usage_count indicates someone else has touched the buffer, so we
647-
* shouldn't re-use it.
682+
* Check whether the buffer can be used and pin it if so. Do this using a
683+
* CAS loop, to avoid having to lock the buffer header.
648684
*/
649-
buf=GetBufferDescriptor(bufnum-1);
650-
local_buf_state=LockBufHdr(buf);
651-
if (BUF_STATE_GET_REFCOUNT(local_buf_state)==0
652-
&&BUF_STATE_GET_USAGECOUNT(local_buf_state) <=1)
685+
old_buf_state=pg_atomic_read_u32(&buf->state);
686+
for (;;)
653687
{
654-
*buf_state=local_buf_state;
655-
returnbuf;
688+
local_buf_state=old_buf_state;
689+
690+
/*
691+
* If the buffer is pinned we cannot use it under any circumstances.
692+
*
693+
* If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
694+
* since our own previous usage of the ring element would have left it
695+
* there, but it might've been decremented by clock-sweep since then).
696+
* A higher usage_count indicates someone else has touched the buffer,
697+
* so we shouldn't re-use it.
698+
*/
699+
if (BUF_STATE_GET_REFCOUNT(local_buf_state)!=0
700+
||BUF_STATE_GET_USAGECOUNT(local_buf_state)>1)
701+
break;
702+
703+
if (unlikely(local_buf_state&BM_LOCKED))
704+
{
705+
old_buf_state=WaitBufHdrUnlocked(buf);
706+
continue;
707+
}
708+
709+
/* pin the buffer if the CAS succeeds */
710+
local_buf_state+=BUF_REFCOUNT_ONE;
711+
712+
if (pg_atomic_compare_exchange_u32(&buf->state,&old_buf_state,
713+
local_buf_state))
714+
{
715+
*buf_state=local_buf_state;
716+
717+
TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
718+
returnbuf;
719+
}
656720
}
657-
UnlockBufHdr(buf,local_buf_state);
658721

659722
/*
660723
* Tell caller to allocate a new buffer with the normal allocation

‎src/include/storage/buf_internals.h‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,8 @@ UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
371371
pg_atomic_write_u32(&desc->state,buf_state& (~BM_LOCKED));
372372
}
373373

374+
externuint32WaitBufHdrUnlocked(BufferDesc*buf);
375+
374376
/* in bufmgr.c */
375377

376378
/*
@@ -425,6 +427,8 @@ extern void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_co
425427
externvoidScheduleBufferTagForWriteback(WritebackContext*wb_context,
426428
IOContextio_context,BufferTag*tag);
427429

430+
externvoidTrackNewBufferPin(Bufferbuf);
431+
428432
/* solely to make it easier to write tests */
429433
externboolStartBufferIO(BufferDesc*buf,boolforInput,boolnowait);
430434
externvoidTerminateBufferIO(BufferDesc*buf,boolclear_dirty,uint32set_flag_bits,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp