Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit12f3867

Browse files
committed
bufmgr: Support multiple in-progress IOs by using resowner
A future patch will add support for extending relations by multiple blocks atonce. To be concurrency safe, the buffers for those blocks need to be markedas BM_IO_IN_PROGRESS. Until now we only had infrastructure for recovering froman IO error for a single buffer. This commit extends that infrastructure tomultiple buffers by using the resource owner infrastructure.This commit increases the size of the ResourceOwnerData struct, which appearsto have a just about measurable overhead in very extreme workloads. Mediumterm we are planning to substantially shrink the size ofResourceOwnerData. Short term the increase is small enough to not worry aboutit for now.Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>Discussion:https://postgr.es/m/20221029025420.eplyow6k7tgu6he3@awork3.anarazel.deDiscussion:https://postgr.es/m/20221029200025.w7bvlgvamjfo6z44@awork3.anarazel.de
1 parent16dc270 commit12f3867

File tree

9 files changed

+102
-53
lines changed

9 files changed

+102
-53
lines changed

‎src/backend/access/transam/xact.c‎

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2719,8 +2719,7 @@ AbortTransaction(void)
27192719
pgstat_report_wait_end();
27202720
pgstat_progress_end_command();
27212721

2722-
/* Clean up buffer I/O and buffer context locks, too */
2723-
AbortBufferIO();
2722+
/* Clean up buffer context locks, too */
27242723
UnlockBuffers();
27252724

27262725
/* Reset WAL record construction state */
@@ -5080,7 +5079,6 @@ AbortSubTransaction(void)
50805079

50815080
pgstat_report_wait_end();
50825081
pgstat_progress_end_command();
5083-
AbortBufferIO();
50845082
UnlockBuffers();
50855083

50865084
/* Reset WAL record construction state */

‎src/backend/postmaster/autovacuum.c‎

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,6 @@ AutoVacLauncherMain(int argc, char *argv[])
526526
*/
527527
LWLockReleaseAll();
528528
pgstat_report_wait_end();
529-
AbortBufferIO();
530529
UnlockBuffers();
531530
/* this is probably dead code, but let's be safe: */
532531
if (AuxProcessResourceOwner)

‎src/backend/postmaster/bgwriter.c‎

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ BackgroundWriterMain(void)
167167
*/
168168
LWLockReleaseAll();
169169
ConditionVariableCancelSleep();
170-
AbortBufferIO();
171170
UnlockBuffers();
172171
ReleaseAuxProcessResources(false);
173172
AtEOXact_Buffers(false);

‎src/backend/postmaster/checkpointer.c‎

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,6 @@ CheckpointerMain(void)
271271
LWLockReleaseAll();
272272
ConditionVariableCancelSleep();
273273
pgstat_report_wait_end();
274-
AbortBufferIO();
275274
UnlockBuffers();
276275
ReleaseAuxProcessResources(false);
277276
AtEOXact_Buffers(false);

‎src/backend/postmaster/walwriter.c‎

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ WalWriterMain(void)
163163
LWLockReleaseAll();
164164
ConditionVariableCancelSleep();
165165
pgstat_report_wait_end();
166-
AbortBufferIO();
167166
UnlockBuffers();
168167
ReleaseAuxProcessResources(false);
169168
AtEOXact_Buffers(false);

‎src/backend/storage/buffer/bufmgr.c‎

Lines changed: 35 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,6 @@ intcheckpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER;
159159
intbgwriter_flush_after=DEFAULT_BGWRITER_FLUSH_AFTER;
160160
intbackend_flush_after=DEFAULT_BACKEND_FLUSH_AFTER;
161161

162-
/* local state for StartBufferIO and related functions */
163-
staticBufferDesc*InProgressBuf=NULL;
164-
staticboolIsForInput;
165-
166162
/* local state for LockBufferForCleanup */
167163
staticBufferDesc*PinCountWaitBuf=NULL;
168164

@@ -2705,7 +2701,6 @@ InitBufferPoolAccess(void)
27052701
staticvoid
27062702
AtProcExit_Buffers(intcode,Datumarg)
27072703
{
2708-
AbortBufferIO();
27092704
UnlockBuffers();
27102705

27112706
CheckForBufferLeaks();
@@ -4647,7 +4642,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
46474642
{
46484643
uint32buf_state;
46494644

4650-
Assert(!InProgressBuf);
4645+
ResourceOwnerEnlargeBufferIOs(CurrentResourceOwner);
46514646

46524647
for (;;)
46534648
{
@@ -4671,8 +4666,8 @@ StartBufferIO(BufferDesc *buf, bool forInput)
46714666
buf_state |=BM_IO_IN_PROGRESS;
46724667
UnlockBufHdr(buf,buf_state);
46734668

4674-
InProgressBuf=buf;
4675-
IsForInput=forInput;
4669+
ResourceOwnerRememberBufferIO(CurrentResourceOwner,
4670+
BufferDescriptorGetBuffer(buf));
46764671

46774672
return true;
46784673
}
@@ -4698,8 +4693,6 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
46984693
{
46994694
uint32buf_state;
47004695

4701-
Assert(buf==InProgressBuf);
4702-
47034696
buf_state=LockBufHdr(buf);
47044697

47054698
Assert(buf_state&BM_IO_IN_PROGRESS);
@@ -4711,13 +4704,14 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
47114704
buf_state |=set_flag_bits;
47124705
UnlockBufHdr(buf,buf_state);
47134706

4714-
InProgressBuf=NULL;
4707+
ResourceOwnerForgetBufferIO(CurrentResourceOwner,
4708+
BufferDescriptorGetBuffer(buf));
47154709

47164710
ConditionVariableBroadcast(BufferDescriptorGetIOCV(buf));
47174711
}
47184712

47194713
/*
4720-
* AbortBufferIO: Clean upanyactive buffer I/O after an error.
4714+
* AbortBufferIO: Clean up active buffer I/O after an error.
47214715
*
47224716
*All LWLocks we might have held have been released,
47234717
*but we haven't yet released buffer pins, so the buffer is still pinned.
@@ -4726,46 +4720,42 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
47264720
*possible the error condition wasn't related to the I/O.
47274721
*/
47284722
void
4729-
AbortBufferIO(void)
4723+
AbortBufferIO(Bufferbuf)
47304724
{
4731-
BufferDesc*buf=InProgressBuf;
4725+
BufferDesc*buf_hdr=GetBufferDescriptor(buf-1);
4726+
uint32buf_state;
47324727

4733-
if (buf)
4734-
{
4735-
uint32buf_state;
4728+
buf_state=LockBufHdr(buf_hdr);
4729+
Assert(buf_state& (BM_IO_IN_PROGRESS |BM_TAG_VALID));
47364730

4737-
buf_state=LockBufHdr(buf);
4738-
Assert(buf_state&BM_IO_IN_PROGRESS);
4739-
if (IsForInput)
4740-
{
4741-
Assert(!(buf_state&BM_DIRTY));
4731+
if (!(buf_state&BM_VALID))
4732+
{
4733+
Assert(!(buf_state&BM_DIRTY));
4734+
UnlockBufHdr(buf_hdr,buf_state);
4735+
}
4736+
else
4737+
{
4738+
Assert(!(buf_state&BM_DIRTY));
4739+
UnlockBufHdr(buf_hdr,buf_state);
47424740

4743-
/* We'd better not think buffer is valid yet */
4744-
Assert(!(buf_state&BM_VALID));
4745-
UnlockBufHdr(buf,buf_state);
4746-
}
4747-
else
4741+
/* Issue notice if this is not the first failure... */
4742+
if (buf_state&BM_IO_ERROR)
47484743
{
4749-
Assert(buf_state&BM_DIRTY);
4750-
UnlockBufHdr(buf,buf_state);
4751-
/* Issue notice if this is not the first failure... */
4752-
if (buf_state&BM_IO_ERROR)
4753-
{
4754-
/* Buffer is pinned, so we can read tag without spinlock */
4755-
char*path;
4756-
4757-
path=relpathperm(BufTagGetRelFileLocator(&buf->tag),
4758-
BufTagGetForkNum(&buf->tag));
4759-
ereport(WARNING,
4760-
(errcode(ERRCODE_IO_ERROR),
4761-
errmsg("could not write block %u of %s",
4762-
buf->tag.blockNum,path),
4763-
errdetail("Multiple failures --- write error might be permanent.")));
4764-
pfree(path);
4765-
}
4744+
/* Buffer is pinned, so we can read tag without spinlock */
4745+
char*path;
4746+
4747+
path=relpathperm(BufTagGetRelFileLocator(&buf_hdr->tag),
4748+
BufTagGetForkNum(&buf_hdr->tag));
4749+
ereport(WARNING,
4750+
(errcode(ERRCODE_IO_ERROR),
4751+
errmsg("could not write block %u of %s",
4752+
buf_hdr->tag.blockNum,path),
4753+
errdetail("Multiple failures --- write error might be permanent.")));
4754+
pfree(path);
47664755
}
4767-
TerminateBufferIO(buf, false,BM_IO_ERROR);
47684756
}
4757+
4758+
TerminateBufferIO(buf_hdr, false,BM_IO_ERROR);
47694759
}
47704760

47714761
/*

‎src/backend/utils/resowner/resowner.c‎

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ typedef struct ResourceOwnerData
121121

122122
/* We have built-in support for remembering: */
123123
ResourceArraybufferarr;/* owned buffers */
124+
ResourceArraybufferioarr;/* in-progress buffer IO */
124125
ResourceArraycatrefarr;/* catcache references */
125126
ResourceArraycatlistrefarr;/* catcache-list pins */
126127
ResourceArrayrelrefarr;/* relcache references */
@@ -441,6 +442,7 @@ ResourceOwnerCreate(ResourceOwner parent, const char *name)
441442
}
442443

443444
ResourceArrayInit(&(owner->bufferarr),BufferGetDatum(InvalidBuffer));
445+
ResourceArrayInit(&(owner->bufferioarr),BufferGetDatum(InvalidBuffer));
444446
ResourceArrayInit(&(owner->catrefarr),PointerGetDatum(NULL));
445447
ResourceArrayInit(&(owner->catlistrefarr),PointerGetDatum(NULL));
446448
ResourceArrayInit(&(owner->relrefarr),PointerGetDatum(NULL));
@@ -517,6 +519,24 @@ ResourceOwnerReleaseInternal(ResourceOwner owner,
517519

518520
if (phase==RESOURCE_RELEASE_BEFORE_LOCKS)
519521
{
522+
/*
523+
* Abort failed buffer IO. AbortBufferIO()->TerminateBufferIO() calls
524+
* ResourceOwnerForgetBufferIOs(), so we just have to iterate till
525+
* there are none.
526+
*
527+
* Needs to be before we release buffer pins.
528+
*
529+
* During a commit, there shouldn't be any in-progress IO.
530+
*/
531+
while (ResourceArrayGetAny(&(owner->bufferioarr),&foundres))
532+
{
533+
Bufferres=DatumGetBuffer(foundres);
534+
535+
if (isCommit)
536+
elog(PANIC,"lost track of buffer IO on buffer %u",res);
537+
AbortBufferIO(res);
538+
}
539+
520540
/*
521541
* Release buffer pins. Note that ReleaseBuffer will remove the
522542
* buffer entry from our array, so we just have to iterate till there
@@ -746,6 +766,7 @@ ResourceOwnerDelete(ResourceOwner owner)
746766

747767
/* And it better not own any resources, either */
748768
Assert(owner->bufferarr.nitems==0);
769+
Assert(owner->bufferioarr.nitems==0);
749770
Assert(owner->catrefarr.nitems==0);
750771
Assert(owner->catlistrefarr.nitems==0);
751772
Assert(owner->relrefarr.nitems==0);
@@ -775,6 +796,7 @@ ResourceOwnerDelete(ResourceOwner owner)
775796

776797
/* And free the object. */
777798
ResourceArrayFree(&(owner->bufferarr));
799+
ResourceArrayFree(&(owner->bufferioarr));
778800
ResourceArrayFree(&(owner->catrefarr));
779801
ResourceArrayFree(&(owner->catlistrefarr));
780802
ResourceArrayFree(&(owner->relrefarr));
@@ -976,6 +998,44 @@ ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
976998
buffer,owner->name);
977999
}
9781000

1001+
1002+
/*
1003+
* Make sure there is room for at least one more entry in a ResourceOwner's
1004+
* buffer array.
1005+
*
1006+
* This is separate from actually inserting an entry because if we run out
1007+
* of memory, it's critical to do so *before* acquiring the resource.
1008+
*/
1009+
void
1010+
ResourceOwnerEnlargeBufferIOs(ResourceOwnerowner)
1011+
{
1012+
/* We used to allow pinning buffers without a resowner, but no more */
1013+
Assert(owner!=NULL);
1014+
ResourceArrayEnlarge(&(owner->bufferioarr));
1015+
}
1016+
1017+
/*
1018+
* Remember that a buffer IO is owned by a ResourceOwner
1019+
*
1020+
* Caller must have previously done ResourceOwnerEnlargeBufferIOs()
1021+
*/
1022+
void
1023+
ResourceOwnerRememberBufferIO(ResourceOwnerowner,Bufferbuffer)
1024+
{
1025+
ResourceArrayAdd(&(owner->bufferioarr),BufferGetDatum(buffer));
1026+
}
1027+
1028+
/*
1029+
* Forget that a buffer IO is owned by a ResourceOwner
1030+
*/
1031+
void
1032+
ResourceOwnerForgetBufferIO(ResourceOwnerowner,Bufferbuffer)
1033+
{
1034+
if (!ResourceArrayRemove(&(owner->bufferioarr),BufferGetDatum(buffer)))
1035+
elog(PANIC,"buffer IO %d is not owned by resource owner %s",
1036+
buffer,owner->name);
1037+
}
1038+
9791039
/*
9801040
* Remember that a Local Lock is owned by a ResourceOwner
9811041
*

‎src/include/storage/bufmgr.h‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ extern bool ConditionalLockBufferForCleanup(Buffer buffer);
181181
externboolIsBufferCleanupOK(Bufferbuffer);
182182
externboolHoldingBufferPinThatDelaysRecovery(void);
183183

184-
externvoidAbortBufferIO(void);
184+
externvoidAbortBufferIO(Bufferbuffer);
185185

186186
externboolBgBufferSync(structWritebackContext*wb_context);
187187

‎src/include/utils/resowner_private.h‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ extern void ResourceOwnerEnlargeBuffers(ResourceOwner owner);
3030
externvoidResourceOwnerRememberBuffer(ResourceOwnerowner,Bufferbuffer);
3131
externvoidResourceOwnerForgetBuffer(ResourceOwnerowner,Bufferbuffer);
3232

33+
/* support for IO-in-progress management */
34+
externvoidResourceOwnerEnlargeBufferIOs(ResourceOwnerowner);
35+
externvoidResourceOwnerRememberBufferIO(ResourceOwnerowner,Bufferbuffer);
36+
externvoidResourceOwnerForgetBufferIO(ResourceOwnerowner,Bufferbuffer);
37+
3338
/* support for local lock management */
3439
externvoidResourceOwnerRememberLock(ResourceOwnerowner,LOCALLOCK*locallock);
3540
externvoidResourceOwnerForgetLock(ResourceOwnerowner,LOCALLOCK*locallock);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp