Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit719c84c

Browse files
committed
Extend relations multiple blocks at a time to improve scalability.
Contention on the relation extension lock can become quite fierce whenmultiple processes are inserting data into the same relation at the sametime at a high rate. Experimentation shows the extending the relationmultiple blocks at a time improves scalability.Dilip Kumar, reviewed by Petr Jelinek, Amit Kapila, and me.
1 parent8643b91 commit719c84c

File tree

7 files changed

+271
-3
lines changed

7 files changed

+271
-3
lines changed

‎src/backend/access/heap/hio.c

Lines changed: 109 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,75 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2,
168168
}
169169
}
170170

171+
/*
172+
* Extend a relation by multiple blocks to avoid future contention on the
173+
* relation extension lock. Our goal is to pre-extend the relation by an
174+
* amount which ramps up as the degree of contention ramps up, but limiting
175+
* the result to some sane overall value.
176+
*/
177+
staticvoid
178+
RelationAddExtraBlocks(Relationrelation,BulkInsertStatebistate)
179+
{
180+
Pagepage;
181+
BlockNumberblockNum=InvalidBlockNumber,
182+
firstBlock=InvalidBlockNumber;
183+
intextraBlocks=0;
184+
intlockWaiters=0;
185+
Sizefreespace=0;
186+
Bufferbuffer;
187+
188+
/* Use the length of the lock wait queue to judge how much to extend. */
189+
lockWaiters=RelationExtensionLockWaiterCount(relation);
190+
if (lockWaiters <=0)
191+
return;
192+
193+
/*
194+
* It might seem like multiplying the number of lock waiters by as much
195+
* as 20 is too aggressive, but benchmarking revealed that smaller numbers
196+
* were insufficient. 512 is just an arbitrary cap to prevent pathological
197+
* results.
198+
*/
199+
extraBlocks=Min(512,lockWaiters*20);
200+
201+
while (extraBlocks-- >=0)
202+
{
203+
/* Ouch - an unnecessary lseek() each time through the loop! */
204+
buffer=ReadBufferBI(relation,P_NEW,bistate);
205+
206+
/* Extend by one page. */
207+
LockBuffer(buffer,BUFFER_LOCK_EXCLUSIVE);
208+
page=BufferGetPage(buffer);
209+
PageInit(page,BufferGetPageSize(buffer),0);
210+
MarkBufferDirty(buffer);
211+
blockNum=BufferGetBlockNumber(buffer);
212+
freespace=PageGetHeapFreeSpace(page);
213+
UnlockReleaseBuffer(buffer);
214+
215+
/* Remember first block number thus added. */
216+
if (firstBlock==InvalidBlockNumber)
217+
firstBlock=blockNum;
218+
219+
/*
220+
* Immediately update the bottom level of the FSM. This has a good
221+
* chance of making this page visible to other concurrently inserting
222+
* backends, and we want that to happen without delay.
223+
*/
224+
RecordPageWithFreeSpace(relation,blockNum,freespace);
225+
}
226+
227+
/*
228+
* Updating the upper levels of the free space map is too expensive
229+
* to do for every block, but it's worth doing once at the end to make
230+
* sure that subsequent insertion activity sees all of those nifty free
231+
* pages we just inserted.
232+
*
233+
* Note that we're using the freespace value that was reported for the
234+
* last block we added as if it were the freespace value for every block
235+
* we added. That's actually true, because they're all equally empty.
236+
*/
237+
UpdateFreeSpaceMap(relation,firstBlock,blockNum,freespace);
238+
}
239+
171240
/*
172241
* RelationGetBufferForTuple
173242
*
@@ -233,8 +302,8 @@ RelationGetBufferForTuple(Relation relation, Size len,
233302
booluse_fsm= !(options&HEAP_INSERT_SKIP_FSM);
234303
Bufferbuffer=InvalidBuffer;
235304
Pagepage;
236-
SizepageFreeSpace,
237-
saveFreeSpace;
305+
SizepageFreeSpace=0,
306+
saveFreeSpace=0;
238307
BlockNumbertargetBlock,
239308
otherBlock;
240309
boolneedLock;
@@ -308,6 +377,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
308377
}
309378
}
310379

380+
loop:
311381
while (targetBlock!=InvalidBlockNumber)
312382
{
313383
/*
@@ -440,10 +510,46 @@ RelationGetBufferForTuple(Relation relation, Size len,
440510
*/
441511
needLock= !RELATION_IS_LOCAL(relation);
442512

513+
/*
514+
* If we need the lock but are not able to acquire it immediately, we'll
515+
* consider extending the relation by multiple blocks at a time to manage
516+
* contention on the relation extension lock. However, this only makes
517+
* sense if we're using the FSM; otherwise, there's no point.
518+
*/
443519
if (needLock)
444-
LockRelationForExtension(relation,ExclusiveLock);
520+
{
521+
if (!use_fsm)
522+
LockRelationForExtension(relation,ExclusiveLock);
523+
elseif (!ConditionalLockRelationForExtension(relation,ExclusiveLock))
524+
{
525+
/* Couldn't get the lock immediately; wait for it. */
526+
LockRelationForExtension(relation,ExclusiveLock);
527+
528+
/*
529+
* Check if some other backend has extended a block for us while
530+
* we were waiting on the lock.
531+
*/
532+
targetBlock=GetPageWithFreeSpace(relation,len+saveFreeSpace);
533+
534+
/*
535+
* If some other waiter has already extended the relation, we
536+
* don't need to do so; just use the existing freespace.
537+
*/
538+
if (targetBlock!=InvalidBlockNumber)
539+
{
540+
UnlockRelationForExtension(relation,ExclusiveLock);
541+
gotoloop;
542+
}
543+
544+
/* Time to bulk-extend. */
545+
RelationAddExtraBlocks(relation,bistate);
546+
}
547+
}
445548

446549
/*
550+
* In addition to whatever extension we performed above, we always add
551+
* at least one block to satisfy our own request.
552+
*
447553
* XXX This does an lseek - rather expensive - but at the moment it is the
448554
* only way to accurately determine how many blocks are in a relation. Is
449555
* it worth keeping an accurate file length in shared memory someplace,

‎src/backend/storage/freespace/freespace.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
109109
uint8newValue,uint8minValue);
110110
staticBlockNumberfsm_search(Relationrel,uint8min_cat);
111111
staticuint8fsm_vacuum_page(Relationrel,FSMAddressaddr,bool*eof);
112+
staticBlockNumberfsm_get_lastblckno(Relationrel,FSMAddressaddr);
113+
staticvoidfsm_update_recursive(Relationrel,FSMAddressaddr,uint8new_cat);
112114

113115

114116
/******** Public API ********/
@@ -188,6 +190,46 @@ RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
188190
fsm_set_and_search(rel,addr,slot,new_cat,0);
189191
}
190192

193+
/*
194+
* Update the upper levels of the free space map all the way up to the root
195+
* to make sure we don't lose track of new blocks we just inserted. This is
196+
* intended to be used after adding many new blocks to the relation; we judge
197+
* it not worth updating the upper levels of the tree every time data for
198+
* a single page changes, but for a bulk-extend it's worth it.
199+
*/
200+
void
201+
UpdateFreeSpaceMap(Relationrel,BlockNumberstartBlkNum,
202+
BlockNumberendBlkNum,Sizefreespace)
203+
{
204+
intnew_cat=fsm_space_avail_to_cat(freespace);
205+
FSMAddressaddr;
206+
uint16slot;
207+
BlockNumberblockNum;
208+
BlockNumberlastBlkOnPage;
209+
210+
blockNum=startBlkNum;
211+
212+
while (blockNum <=endBlkNum)
213+
{
214+
/*
215+
* Find FSM address for this block; update tree all the way to the
216+
* root.
217+
*/
218+
addr=fsm_get_location(blockNum,&slot);
219+
fsm_update_recursive(rel,addr,new_cat);
220+
221+
/*
222+
* Get the last block number on this FSM page. If that's greater
223+
* than or equal to our endBlkNum, we're done. Otherwise, advance
224+
* to the first block on the next page.
225+
*/
226+
lastBlkOnPage=fsm_get_lastblckno(rel,addr);
227+
if (lastBlkOnPage >=endBlkNum)
228+
break;
229+
blockNum=lastBlkOnPage+1;
230+
}
231+
}
232+
191233
/*
192234
* XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
193235
*WAL replay
@@ -788,3 +830,42 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
788830

789831
returnmax_avail;
790832
}
833+
834+
/*
835+
* This function will return the last block number stored on given
836+
* FSM page address.
837+
*/
838+
staticBlockNumber
839+
fsm_get_lastblckno(Relationrel,FSMAddressaddr)
840+
{
841+
intslot;
842+
843+
/*
844+
* Get the last slot number on the given address and convert that to
845+
* block number
846+
*/
847+
slot=SlotsPerFSMPage-1;
848+
returnfsm_get_heap_blk(addr,slot);
849+
}
850+
851+
/*
852+
* Recursively update the FSM tree from given address to
853+
* all the way up to root.
854+
*/
855+
staticvoid
856+
fsm_update_recursive(Relationrel,FSMAddressaddr,uint8new_cat)
857+
{
858+
uint16parentslot;
859+
FSMAddressparent;
860+
861+
if (addr.level==FSM_ROOT_LEVEL)
862+
return;
863+
864+
/*
865+
* Get the parent page and our slot in the parent page, and
866+
* update the information in that.
867+
*/
868+
parent=fsm_get_parent(addr,&parentslot);
869+
fsm_set_and_search(rel,parent,parentslot,new_cat,0);
870+
fsm_update_recursive(rel,parent,new_cat);
871+
}

‎src/backend/storage/lmgr/lmgr.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,41 @@ LockRelationForExtension(Relation relation, LOCKMODE lockmode)
340340
(void)LockAcquire(&tag,lockmode, false, false);
341341
}
342342

343+
/*
344+
*ConditionalLockRelationForExtension
345+
*
346+
* As above, but only lock if we can get the lock without blocking.
347+
* Returns TRUE iff the lock was acquired.
348+
*/
349+
bool
350+
ConditionalLockRelationForExtension(Relationrelation,LOCKMODElockmode)
351+
{
352+
LOCKTAGtag;
353+
354+
SET_LOCKTAG_RELATION_EXTEND(tag,
355+
relation->rd_lockInfo.lockRelId.dbId,
356+
relation->rd_lockInfo.lockRelId.relId);
357+
358+
return (LockAcquire(&tag,lockmode, false, true)!=LOCKACQUIRE_NOT_AVAIL);
359+
}
360+
361+
/*
362+
*RelationExtensionLockWaiterCount
363+
*
364+
* Count the number of processes waiting for the given relation extension lock.
365+
*/
366+
int
367+
RelationExtensionLockWaiterCount(Relationrelation)
368+
{
369+
LOCKTAGtag;
370+
371+
SET_LOCKTAG_RELATION_EXTEND(tag,
372+
relation->rd_lockInfo.lockRelId.dbId,
373+
relation->rd_lockInfo.lockRelId.relId);
374+
375+
returnLockWaiterCount(&tag);
376+
}
377+
343378
/*
344379
*UnlockRelationForExtension
345380
*/

‎src/backend/storage/lmgr/lock.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4380,3 +4380,40 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
43804380
LockRelease(&tag,ShareLock, false);
43814381
return true;
43824382
}
4383+
4384+
/*
4385+
* LockWaiterCount
4386+
*
4387+
* Find the number of lock requester on this locktag
4388+
*/
4389+
int
4390+
LockWaiterCount(constLOCKTAG*locktag)
4391+
{
4392+
LOCKMETHODIDlockmethodid=locktag->locktag_lockmethodid;
4393+
LOCK*lock;
4394+
boolfound;
4395+
uint32hashcode;
4396+
LWLock*partitionLock;
4397+
intwaiters=0;
4398+
4399+
if (lockmethodid <=0||lockmethodid >=lengthof(LockMethods))
4400+
elog(ERROR,"unrecognized lock method: %d",lockmethodid);
4401+
4402+
hashcode=LockTagHashCode(locktag);
4403+
partitionLock=LockHashPartitionLock(hashcode);
4404+
LWLockAcquire(partitionLock,LW_EXCLUSIVE);
4405+
4406+
lock= (LOCK*)hash_search_with_hash_value(LockMethodLockHash,
4407+
(constvoid*)locktag,
4408+
hashcode,
4409+
HASH_FIND,
4410+
&found);
4411+
if (found)
4412+
{
4413+
Assert(lock!=NULL);
4414+
waiters=lock->nRequested;
4415+
}
4416+
LWLockRelease(partitionLock);
4417+
4418+
returnwaiters;
4419+
}

‎src/include/storage/freespace.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,9 @@ extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
3232

3333
externvoidFreeSpaceMapTruncateRel(Relationrel,BlockNumbernblocks);
3434
externvoidFreeSpaceMapVacuum(Relationrel);
35+
externvoidUpdateFreeSpaceMap(Relationrel,
36+
BlockNumberfirtsBlkNum,
37+
BlockNumberlastBlkNum,
38+
Sizefreespace);
3539

3640
#endif/* FREESPACE_H_ */

‎src/include/storage/lmgr.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
5353
/* Lock a relation for extension */
5454
externvoidLockRelationForExtension(Relationrelation,LOCKMODElockmode);
5555
externvoidUnlockRelationForExtension(Relationrelation,LOCKMODElockmode);
56+
externboolConditionalLockRelationForExtension(Relationrelation,
57+
LOCKMODElockmode);
58+
externintRelationExtensionLockWaiterCount(Relationrelation);
5659

5760
/* Lock a page (currently only used within indexes) */
5861
externvoidLockPage(Relationrelation,BlockNumberblkno,LOCKMODElockmode);

‎src/include/storage/lock.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,8 @@ extern void RememberSimpleDeadLock(PGPROC *proc1,
574574
PGPROC*proc2);
575575
externvoidInitDeadLockChecking(void);
576576

577+
externintLockWaiterCount(constLOCKTAG*locktag);
578+
577579
#ifdefLOCK_DEBUG
578580
externvoidDumpLocks(PGPROC*proc);
579581
externvoidDumpAllLocks(void);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp