Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit218f515

Browse files
committed
Reduce page locking in GIN vacuum
GIN vacuum during cleaning posting tree can lock this whole tree for a longtime with by holding LockBufferForCleanup() on root. Patch changes it withtwo ways: first, cleanup lock will be taken only if there is an empty page(which should be deleted) and, second, it tries to lock only subtree, not thewhole posting tree.Author: Andrey Borodin with minor editorization by meReviewed-by: Jeff Davis, mehttps://commitfest.postgresql.org/13/896/
1 parent7356101 commit218f515

File tree

4 files changed

+145
-110
lines changed

4 files changed

+145
-110
lines changed

‎src/backend/access/gin/README

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,17 @@ deleted.
314314
The previous paragraph's reasoning only applies to searches, and only to
315315
posting trees. To protect from inserters following a downlink to a deleted
316316
page, vacuum simply locks out all concurrent insertions to the posting tree,
317-
by holding a super-exclusive lock on the posting tree root. Inserters hold a
318-
pin on the root page, but searches do not, so while new searches cannot begin
319-
while root page is locked, any already-in-progress scans can continue
320-
concurrently with vacuum. In the entry tree, we never delete pages.
317+
by holding a super-exclusive lock on the parent page of subtree with deletable
318+
pages. Inserters hold a pin on the root page, but searches do not, so while
319+
new searches cannot begin while root page is locked, any already-in-progress
320+
scans can continue concurrently with vacuum in corresponding subtree of
321+
posting tree. To exclude interference with readers vacuum takes exclusive
322+
locks in a depth-first scan in left-to-right order of page tuples. Leftmost
323+
page is never deleted. Thus before deleting any page we obtain exclusive
324+
lock on any left page, effectively excluding deadlock with any reader, despite
325+
taking parent lock before current and left lock after current. We take left
326+
lock not for a concurrency reasons, but rather in need to mark page dirty.
327+
In the entry tree, we never delete pages.
321328

322329
(This is quite different from the mechanism the btree indexam uses to make
323330
page-deletions safe; it stamps the deleted pages with an XID and keeps the

‎src/backend/access/gin/ginbtree.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
3131
/*
3232
* Lock buffer by needed method for search.
3333
*/
34-
staticint
34+
int
3535
ginTraverseLock(Bufferbuffer,boolsearchMode)
3636
{
3737
Pagepage;

‎src/backend/access/gin/ginvacuum.c

Lines changed: 131 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -109,75 +109,17 @@ xlogVacuumPage(Relation index, Buffer buffer)
109109
PageSetLSN(page,recptr);
110110
}
111111

112-
staticbool
113-
ginVacuumPostingTreeLeaves(GinVacuumState*gvs,BlockNumberblkno,boolisRoot,Buffer*rootBuffer)
114-
{
115-
Bufferbuffer;
116-
Pagepage;
117-
boolhasVoidPage= FALSE;
118-
MemoryContextoldCxt;
119-
120-
buffer=ReadBufferExtended(gvs->index,MAIN_FORKNUM,blkno,
121-
RBM_NORMAL,gvs->strategy);
122-
page=BufferGetPage(buffer);
123-
124-
/*
125-
* We should be sure that we don't concurrent with inserts, insert process
126-
* never release root page until end (but it can unlock it and lock
127-
* again). New scan can't start but previously started ones work
128-
* concurrently.
129-
*/
130-
if (isRoot)
131-
LockBufferForCleanup(buffer);
132-
else
133-
LockBuffer(buffer,GIN_EXCLUSIVE);
134-
135-
Assert(GinPageIsData(page));
136112

137-
if (GinPageIsLeaf(page))
138-
{
139-
oldCxt=MemoryContextSwitchTo(gvs->tmpCxt);
140-
ginVacuumPostingTreeLeaf(gvs->index,buffer,gvs);
141-
MemoryContextSwitchTo(oldCxt);
142-
MemoryContextReset(gvs->tmpCxt);
143-
144-
/* if root is a leaf page, we don't desire further processing */
145-
if (!isRoot&& !hasVoidPage&&GinDataLeafPageIsEmpty(page))
146-
hasVoidPage= TRUE;
147-
}
148-
else
149-
{
150-
OffsetNumberi;
151-
boolisChildHasVoid= FALSE;
152-
153-
for (i=FirstOffsetNumber;i <=GinPageGetOpaque(page)->maxoff;i++)
154-
{
155-
PostingItem*pitem=GinDataPageGetPostingItem(page,i);
156-
157-
if (ginVacuumPostingTreeLeaves(gvs,PostingItemGetBlockNumber(pitem), FALSE,NULL))
158-
isChildHasVoid= TRUE;
159-
}
160-
161-
if (isChildHasVoid)
162-
hasVoidPage= TRUE;
163-
}
113+
typedefstructDataPageDeleteStack
114+
{
115+
structDataPageDeleteStack*child;
116+
structDataPageDeleteStack*parent;
164117

165-
/*
166-
* if we have root and there are empty pages in tree, then we don't
167-
* release lock to go further processing and guarantee that tree is unused
168-
*/
169-
if (!(isRoot&&hasVoidPage))
170-
{
171-
UnlockReleaseBuffer(buffer);
172-
}
173-
else
174-
{
175-
Assert(rootBuffer);
176-
*rootBuffer=buffer;
177-
}
118+
BlockNumberblkno;/* current block number */
119+
BlockNumberleftBlkno;/* rightest non-deleted page on left */
120+
boolisRoot;
121+
}DataPageDeleteStack;
178122

179-
returnhasVoidPage;
180-
}
181123

182124
/*
183125
* Delete a posting tree page.
@@ -194,8 +136,13 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
194136
BlockNumberrightlink;
195137

196138
/*
197-
* Lock the pages in the same order as an insertion would, to avoid
198-
* deadlocks: left, then right, then parent.
139+
* This function MUST be called only if someone of parent pages hold
140+
* exclusive cleanup lock. This guarantees that no insertions currently
141+
* happen in this subtree. Caller also acquire Exclusive lock on deletable
142+
* page and is acquiring and releasing exclusive lock on left page before.
143+
* Left page was locked and released. Then parent and this page are locked.
144+
* We acquire left page lock here only to mark page dirty after changing
145+
* right pointer.
199146
*/
200147
lBuffer=ReadBufferExtended(gvs->index,MAIN_FORKNUM,leftBlkno,
201148
RBM_NORMAL,gvs->strategy);
@@ -205,10 +152,6 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
205152
RBM_NORMAL,gvs->strategy);
206153

207154
LockBuffer(lBuffer,GIN_EXCLUSIVE);
208-
LockBuffer(dBuffer,GIN_EXCLUSIVE);
209-
if (!isParentRoot)/* parent is already locked by
210-
* LockBufferForCleanup() */
211-
LockBuffer(pBuffer,GIN_EXCLUSIVE);
212155

213156
START_CRIT_SECTION();
214157

@@ -272,26 +215,15 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
272215
PageSetLSN(BufferGetPage(lBuffer),recptr);
273216
}
274217

275-
if (!isParentRoot)
276-
LockBuffer(pBuffer,GIN_UNLOCK);
277218
ReleaseBuffer(pBuffer);
278219
UnlockReleaseBuffer(lBuffer);
279-
UnlockReleaseBuffer(dBuffer);
220+
ReleaseBuffer(dBuffer);
280221

281222
END_CRIT_SECTION();
282223

283224
gvs->result->pages_deleted++;
284225
}
285226

286-
typedefstructDataPageDeleteStack
287-
{
288-
structDataPageDeleteStack*child;
289-
structDataPageDeleteStack*parent;
290-
291-
BlockNumberblkno;/* current block number */
292-
BlockNumberleftBlkno;/* rightest non-deleted page on left */
293-
boolisRoot;
294-
}DataPageDeleteStack;
295227

296228
/*
297229
* scans posting tree and deletes empty pages
@@ -325,6 +257,10 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
325257

326258
buffer=ReadBufferExtended(gvs->index,MAIN_FORKNUM,blkno,
327259
RBM_NORMAL,gvs->strategy);
260+
261+
if(!isRoot)
262+
LockBuffer(buffer,GIN_EXCLUSIVE);
263+
328264
page=BufferGetPage(buffer);
329265

330266
Assert(GinPageIsData(page));
@@ -359,6 +295,9 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
359295
}
360296
}
361297

298+
if(!isRoot)
299+
LockBuffer(buffer,GIN_UNLOCK);
300+
362301
ReleaseBuffer(buffer);
363302

364303
if (!meDelete)
@@ -367,37 +306,124 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
367306
returnmeDelete;
368307
}
369308

370-
staticvoid
371-
ginVacuumPostingTree(GinVacuumState*gvs,BlockNumberrootBlkno)
309+
310+
/*
311+
* Scan through posting tree, delete empty tuples from leaf pages.
312+
* Also, this function collects empty subtrees (with all empty leafs).
313+
* For parents of these subtrees CleanUp lock is taken, then we call
314+
* ScanToDelete. This is done for every inner page, which points to
315+
* empty subtree.
316+
*/
317+
staticbool
318+
ginVacuumPostingTreeLeaves(GinVacuumState*gvs,BlockNumberblkno,boolisRoot)
372319
{
373-
BufferrootBuffer=InvalidBuffer;
374-
DataPageDeleteStackroot,
375-
*ptr,
376-
*tmp;
320+
Bufferbuffer;
321+
Pagepage;
322+
boolhasVoidPage= FALSE;
323+
MemoryContextoldCxt;
324+
325+
buffer=ReadBufferExtended(gvs->index,MAIN_FORKNUM,blkno,
326+
RBM_NORMAL,gvs->strategy);
327+
page=BufferGetPage(buffer);
328+
329+
ginTraverseLock(buffer,false);
377330

378-
if (ginVacuumPostingTreeLeaves(gvs,rootBlkno, TRUE,&rootBuffer)== FALSE)
331+
Assert(GinPageIsData(page));
332+
333+
if (GinPageIsLeaf(page))
379334
{
380-
Assert(rootBuffer==InvalidBuffer);
381-
return;
335+
oldCxt=MemoryContextSwitchTo(gvs->tmpCxt);
336+
ginVacuumPostingTreeLeaf(gvs->index,buffer,gvs);
337+
MemoryContextSwitchTo(oldCxt);
338+
MemoryContextReset(gvs->tmpCxt);
339+
340+
/* if root is a leaf page, we don't desire further processing */
341+
if (GinDataLeafPageIsEmpty(page))
342+
hasVoidPage= TRUE;
343+
344+
UnlockReleaseBuffer(buffer);
345+
346+
returnhasVoidPage;
382347
}
348+
else
349+
{
350+
OffsetNumberi;
351+
boolhasEmptyChild= FALSE;
352+
boolhasNonEmptyChild= FALSE;
353+
OffsetNumbermaxoff=GinPageGetOpaque(page)->maxoff;
354+
BlockNumber*children=palloc(sizeof(BlockNumber)* (maxoff+1));
355+
356+
/*
357+
* Read all children BlockNumbers.
358+
* Not sure it is safe if there are many concurrent vacuums.
359+
*/
383360

384-
memset(&root,0,sizeof(DataPageDeleteStack));
385-
root.leftBlkno=InvalidBlockNumber;
386-
root.isRoot= TRUE;
361+
for (i=FirstOffsetNumber;i <=maxoff;i++)
362+
{
363+
PostingItem*pitem=GinDataPageGetPostingItem(page,i);
387364

388-
vacuum_delay_point();
365+
children[i]=PostingItemGetBlockNumber(pitem);
366+
}
389367

390-
ginScanToDelete(gvs,rootBlkno, TRUE,&root,InvalidOffsetNumber);
368+
UnlockReleaseBuffer(buffer);
391369

392-
ptr=root.child;
393-
while (ptr)
394-
{
395-
tmp=ptr->child;
396-
pfree(ptr);
397-
ptr=tmp;
370+
for (i=FirstOffsetNumber;i <=maxoff;i++)
371+
{
372+
if (ginVacuumPostingTreeLeaves(gvs,children[i], FALSE))
373+
hasEmptyChild= TRUE;
374+
else
375+
hasNonEmptyChild= TRUE;
376+
}
377+
378+
pfree(children);
379+
380+
vacuum_delay_point();
381+
382+
/*
383+
* All subtree is empty - just return TRUE to indicate that parent must
384+
* do a cleanup. Unless we are ROOT an there is way to go upper.
385+
*/
386+
387+
if(hasEmptyChild&& !hasNonEmptyChild&& !isRoot)
388+
return TRUE;
389+
390+
if(hasEmptyChild)
391+
{
392+
DataPageDeleteStackroot,
393+
*ptr,
394+
*tmp;
395+
396+
buffer=ReadBufferExtended(gvs->index,MAIN_FORKNUM,blkno,
397+
RBM_NORMAL,gvs->strategy);
398+
LockBufferForCleanup(buffer);
399+
400+
memset(&root,0,sizeof(DataPageDeleteStack));
401+
root.leftBlkno=InvalidBlockNumber;
402+
root.isRoot= TRUE;
403+
404+
ginScanToDelete(gvs,blkno, TRUE,&root,InvalidOffsetNumber);
405+
406+
ptr=root.child;
407+
408+
while (ptr)
409+
{
410+
tmp=ptr->child;
411+
pfree(ptr);
412+
ptr=tmp;
413+
}
414+
415+
UnlockReleaseBuffer(buffer);
416+
}
417+
418+
/* Here we have deleted all empty subtrees */
419+
return FALSE;
398420
}
421+
}
399422

400-
UnlockReleaseBuffer(rootBuffer);
423+
staticvoid
424+
ginVacuumPostingTree(GinVacuumState*gvs,BlockNumberrootBlkno)
425+
{
426+
ginVacuumPostingTreeLeaves(gvs,rootBlkno, TRUE);
401427
}
402428

403429
/*

‎src/include/access/gin_private.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,4 +471,6 @@ ginCompareItemPointers(ItemPointer a, ItemPointer b)
471471
return-1;
472472
}
473473

474+
externintginTraverseLock(Bufferbuffer,boolsearchMode);
475+
474476
#endif/* GIN_PRIVATE_H */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp