Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit43d1ed6

Browse files
committed
Predicate locking in GIN index
Predicate locks are used on per page basis only if fastupdate = off, inopposite case predicate lock on pending list will effectively lock whole index,to reduce locking overhead, just lock a relation. Entry and posting trees areessentially B-tree, so locks are acquired on leaf pages only.Author: Shubham Barai with some editorization by me and Dmitry IvanovReview by: Alexander Korotkov, Dmitry Ivanov, Fedor SigaevDiscussion:https://www.postgresql.org/message-id/flat/CALxAEPt5sWW+EwTaKUGFL5_XFcZ0MuGBcyJ70oqbWqr42YKR8Q@mail.gmail.com
1 parent019fa57 commit43d1ed6

File tree

11 files changed

+1054
-18
lines changed

11 files changed

+1054
-18
lines changed

‎src/backend/access/gin/ginbtree.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include"access/gin_private.h"
1818
#include"access/ginxlog.h"
1919
#include"access/xloginsert.h"
20+
#include"storage/predicate.h"
2021
#include"miscadmin.h"
2122
#include"utils/memutils.h"
2223
#include"utils/rel.h"
@@ -515,6 +516,19 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
515516
btree->fillRoot(btree,newrootpg,
516517
BufferGetBlockNumber(lbuffer),newlpage,
517518
BufferGetBlockNumber(rbuffer),newrpage);
519+
520+
if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
521+
{
522+
523+
PredicateLockPageSplit(btree->index,
524+
BufferGetBlockNumber(stack->buffer),
525+
BufferGetBlockNumber(lbuffer));
526+
527+
PredicateLockPageSplit(btree->index,
528+
BufferGetBlockNumber(stack->buffer),
529+
BufferGetBlockNumber(rbuffer));
530+
}
531+
518532
}
519533
else
520534
{
@@ -524,6 +538,14 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
524538
GinPageGetOpaque(newrpage)->rightlink=savedRightLink;
525539
GinPageGetOpaque(newlpage)->flags |=GIN_INCOMPLETE_SPLIT;
526540
GinPageGetOpaque(newlpage)->rightlink=BufferGetBlockNumber(rbuffer);
541+
542+
if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
543+
{
544+
545+
PredicateLockPageSplit(btree->index,
546+
BufferGetBlockNumber(stack->buffer),
547+
BufferGetBlockNumber(rbuffer));
548+
}
527549
}
528550

529551
/*

‎src/backend/access/gin/gindatapage.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include"access/xloginsert.h"
2020
#include"lib/ilist.h"
2121
#include"miscadmin.h"
22+
#include"storage/predicate.h"
2223
#include"utils/rel.h"
2324

2425
/*
@@ -1759,7 +1760,7 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
17591760
*/
17601761
BlockNumber
17611762
createPostingTree(Relationindex,ItemPointerData*items,uint32nitems,
1762-
GinStatsData*buildStats)
1763+
GinStatsData*buildStats,Bufferentrybuffer)
17631764
{
17641765
BlockNumberblkno;
17651766
Bufferbuffer;
@@ -1810,6 +1811,12 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
18101811
page=BufferGetPage(buffer);
18111812
blkno=BufferGetBlockNumber(buffer);
18121813

1814+
/*
1815+
* Copy a predicate lock from entry tree leaf (containing posting list)
1816+
* to posting tree.
1817+
*/
1818+
PredicateLockPageSplit(index,BufferGetBlockNumber(entrybuffer),blkno);
1819+
18131820
START_CRIT_SECTION();
18141821

18151822
PageRestoreTempPage(tmppage,page);
@@ -1904,6 +1911,7 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
19041911
btree.itemptr=insertdata.items[insertdata.curitem];
19051912
stack=ginFindLeafPage(&btree, false,NULL);
19061913

1914+
GinCheckForSerializableConflictIn(btree.index,NULL,stack->buffer);
19071915
ginInsertValue(&btree,stack,&insertdata,buildStats);
19081916
}
19091917
}

‎src/backend/access/gin/ginget.c

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
#include"access/gin_private.h"
1818
#include"access/relscan.h"
1919
#include"miscadmin.h"
20+
#include"storage/predicate.h"
2021
#include"utils/datum.h"
2122
#include"utils/memutils.h"
23+
#include"utils/rel.h"
2224

2325
/* GUC parameter */
2426
intGinFuzzySearchLimit=0;
@@ -33,11 +35,25 @@ typedef struct pendingPosition
3335
}pendingPosition;
3436

3537

38+
/*
39+
* Place predicate lock on GIN page if needed.
40+
*/
41+
staticvoid
42+
GinPredicateLockPage(Relationindex,BlockNumberblkno,Snapshotsnapshot)
43+
{
44+
/*
45+
* When fast update is on then no need in locking pages, because we
46+
* anyway need to lock the whole index.
47+
*/
48+
if (!GinGetUseFastUpdate(index))
49+
PredicateLockPage(index,blkno,snapshot);
50+
}
51+
3652
/*
3753
* Goes to the next page if current offset is outside of bounds
3854
*/
3955
staticbool
40-
moveRightIfItNeeded(GinBtreeData*btree,GinBtreeStack*stack)
56+
moveRightIfItNeeded(GinBtreeData*btree,GinBtreeStack*stack,Snapshotsnapshot)
4157
{
4258
Pagepage=BufferGetPage(stack->buffer);
4359

@@ -52,6 +68,7 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
5268
stack->buffer=ginStepRight(stack->buffer,btree->index,GIN_SHARE);
5369
stack->blkno=BufferGetBlockNumber(stack->buffer);
5470
stack->off=FirstOffsetNumber;
71+
GinPredicateLockPage(btree->index,stack->blkno,snapshot);
5572
}
5673

5774
return true;
@@ -73,6 +90,7 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
7390
/* Descend to the leftmost leaf page */
7491
stack=ginScanBeginPostingTree(&btree,index,rootPostingTree,snapshot);
7592
buffer=stack->buffer;
93+
7694
IncrBufferRefCount(buffer);/* prevent unpin in freeGinBtreeStack */
7795

7896
freeGinBtreeStack(stack);
@@ -82,6 +100,11 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
82100
*/
83101
for (;;)
84102
{
103+
/*
104+
* Predicate lock each leaf page in posting tree
105+
*/
106+
GinPredicateLockPage(index,BufferGetBlockNumber(buffer),snapshot);
107+
85108
page=BufferGetPage(buffer);
86109
if ((GinPageGetOpaque(page)->flags&GIN_DELETED)==0)
87110
{
@@ -131,6 +154,12 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
131154
attnum=scanEntry->attnum;
132155
attr=TupleDescAttr(btree->ginstate->origTupdesc,attnum-1);
133156

157+
/*
158+
* Predicate lock entry leaf page, following pages will be locked by
159+
* moveRightIfItNeeded()
160+
*/
161+
GinPredicateLockPage(btree->index,stack->buffer,snapshot);
162+
134163
for (;;)
135164
{
136165
Pagepage;
@@ -141,7 +170,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
141170
/*
142171
* stack->off points to the interested entry, buffer is already locked
143172
*/
144-
if (moveRightIfItNeeded(btree,stack)== false)
173+
if (moveRightIfItNeeded(btree,stack,snapshot)== false)
145174
return true;
146175

147176
page=BufferGetPage(stack->buffer);
@@ -250,7 +279,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
250279
DatumnewDatum;
251280
GinNullCategorynewCategory;
252281

253-
if (moveRightIfItNeeded(btree,stack)== false)
282+
if (moveRightIfItNeeded(btree,stack,snapshot)== false)
254283
elog(ERROR,"lost saved point in index");/* must not happen !!! */
255284

256285
page=BufferGetPage(stack->buffer);
@@ -323,6 +352,7 @@ startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
323352
ginstate);
324353
stackEntry=ginFindLeafPage(&btreeEntry, true,snapshot);
325354
page=BufferGetPage(stackEntry->buffer);
355+
326356
/* ginFindLeafPage() will have already checked snapshot age. */
327357
needUnlock= true;
328358

@@ -370,6 +400,10 @@ startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
370400
{
371401
IndexTupleitup= (IndexTuple)PageGetItem(page,PageGetItemId(page,stackEntry->off));
372402

403+
/* Predicate lock visited entry leaf page */
404+
GinPredicateLockPage(ginstate->index,
405+
BufferGetBlockNumber(stackEntry->buffer),snapshot);
406+
373407
if (GinIsPostingTree(itup))
374408
{
375409
BlockNumberrootPostingTree=GinGetPostingTree(itup);
@@ -391,6 +425,12 @@ startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
391425
rootPostingTree,snapshot);
392426
entry->buffer=stack->buffer;
393427

428+
/*
429+
* Predicate lock visited posting tree page, following pages
430+
* will be locked by moveRightIfItNeeded or entryLoadMoreItems
431+
*/
432+
GinPredicateLockPage(ginstate->index,BufferGetBlockNumber(entry->buffer),snapshot);
433+
394434
/*
395435
* We keep buffer pinned because we need to prevent deletion of
396436
* page during scan. See GIN's vacuum implementation. RefCount is
@@ -493,7 +533,7 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
493533

494534
for (i=0;i<key->nentries-1;i++)
495535
{
496-
/* Pass all entries <= i asFALSE, and the rest as MAYBE */
536+
/* Pass all entries <= i asfalse, and the rest as MAYBE */
497537
for (j=0;j <=i;j++)
498538
key->entryRes[entryIndexes[j]]=GIN_FALSE;
499539
for (j=i+1;j<key->nentries;j++)
@@ -633,6 +673,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
633673
entry->btree.fullScan= false;
634674
stack=ginFindLeafPage(&entry->btree, true,snapshot);
635675

676+
GinPredicateLockPage(ginstate->index,BufferGetBlockNumber(stack->buffer),snapshot);
677+
636678
/* we don't need the stack, just the buffer. */
637679
entry->buffer=stack->buffer;
638680
IncrBufferRefCount(entry->buffer);
@@ -677,6 +719,10 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
677719
entry->buffer=ginStepRight(entry->buffer,
678720
ginstate->index,
679721
GIN_SHARE);
722+
723+
GinPredicateLockPage(ginstate->index,BufferGetBlockNumber(entry->buffer),snapshot);
724+
725+
680726
page=BufferGetPage(entry->buffer);
681727
}
682728
stepright= true;
@@ -1038,8 +1084,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
10381084
* lossy page even when none of the other entries match.
10391085
*
10401086
* Our strategy is to call the tri-state consistent function, with the
1041-
* lossy-page entries set to MAYBE, and all the other entriesFALSE. If it
1042-
* returnsFALSE, none of the lossy items alone are enough for a match, so
1087+
* lossy-page entries set to MAYBE, and all the other entriesfalse. If it
1088+
* returnsfalse, none of the lossy items alone are enough for a match, so
10431089
* we don't need to return a lossy-page pointer. Otherwise, return a
10441090
* lossy-page pointer to indicate that the whole heap page must be
10451091
* checked. (On subsequent calls, we'll do nothing until minItem is past
@@ -1700,7 +1746,8 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
17001746
}
17011747

17021748
/*
1703-
* Collect all matched rows from pending list into bitmap
1749+
* Collect all matched rows from pending list into bitmap. Also function
1750+
* takes PendingLockRelation if it's needed.
17041751
*/
17051752
staticvoid
17061753
scanPendingInsert(IndexScanDescscan,TIDBitmap*tbm,int64*ntids)
@@ -1730,9 +1777,24 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
17301777
{
17311778
/* No pending list, so proceed with normal scan */
17321779
UnlockReleaseBuffer(metabuffer);
1780+
1781+
/*
1782+
* If fast update is enabled, we acquire a predicate lock on the entire
1783+
* relation as fast update postpones the insertion of tuples into index
1784+
* structure due to which we can't detect rw conflicts.
1785+
*/
1786+
if (GinGetUseFastUpdate(scan->indexRelation))
1787+
PredicateLockRelation(scan->indexRelation,scan->xs_snapshot);
1788+
17331789
return;
17341790
}
17351791

1792+
/*
1793+
* Pending list is not empty, we need to lock the index doesn't despite on
1794+
* fastupdate state
1795+
*/
1796+
PredicateLockRelation(scan->indexRelation,scan->xs_snapshot);
1797+
17361798
pos.pendingBuffer=ReadBuffer(scan->indexRelation,blkno);
17371799
LockBuffer(pos.pendingBuffer,GIN_SHARE);
17381800
pos.firstOffset=FirstOffsetNumber;

‎src/backend/access/gin/gininsert.c

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include"storage/bufmgr.h"
2323
#include"storage/smgr.h"
2424
#include"storage/indexfsm.h"
25+
#include"storage/predicate.h"
2526
#include"utils/memutils.h"
2627
#include"utils/rel.h"
2728

@@ -48,7 +49,7 @@ static IndexTuple
4849
addItemPointersToLeafTuple(GinState*ginstate,
4950
IndexTupleold,
5051
ItemPointerData*items,uint32nitem,
51-
GinStatsData*buildStats)
52+
GinStatsData*buildStats,Bufferbuffer)
5253
{
5354
OffsetNumberattnum;
5455
Datumkey;
@@ -99,7 +100,8 @@ addItemPointersToLeafTuple(GinState *ginstate,
99100
postingRoot=createPostingTree(ginstate->index,
100101
oldItems,
101102
oldNPosting,
102-
buildStats);
103+
buildStats,
104+
buffer);
103105

104106
/* Now insert the TIDs-to-be-added into the posting tree */
105107
ginInsertItemPointers(ginstate->index,postingRoot,
@@ -127,7 +129,7 @@ static IndexTuple
127129
buildFreshLeafTuple(GinState*ginstate,
128130
OffsetNumberattnum,Datumkey,GinNullCategorycategory,
129131
ItemPointerData*items,uint32nitem,
130-
GinStatsData*buildStats)
132+
GinStatsData*buildStats,Bufferbuffer)
131133
{
132134
IndexTupleres=NULL;
133135
GinPostingList*compressedList;
@@ -157,7 +159,7 @@ buildFreshLeafTuple(GinState *ginstate,
157159
* Initialize a new posting tree with the TIDs.
158160
*/
159161
postingRoot=createPostingTree(ginstate->index,items,nitem,
160-
buildStats);
162+
buildStats,buffer);
161163

162164
/* And save the root link in the result tuple */
163165
GinSetPostingTree(res,postingRoot);
@@ -217,17 +219,19 @@ ginEntryInsert(GinState *ginstate,
217219
return;
218220
}
219221

222+
GinCheckForSerializableConflictIn(btree.index,NULL,stack->buffer);
220223
/* modify an existing leaf entry */
221224
itup=addItemPointersToLeafTuple(ginstate,itup,
222-
items,nitem,buildStats);
225+
items,nitem,buildStats,stack->buffer);
223226

224227
insertdata.isDelete= true;
225228
}
226229
else
227230
{
231+
GinCheckForSerializableConflictIn(btree.index,NULL,stack->buffer);
228232
/* no match, so construct a new leaf entry */
229233
itup=buildFreshLeafTuple(ginstate,attnum,key,category,
230-
items,nitem,buildStats);
234+
items,nitem,buildStats,stack->buffer);
231235
}
232236

233237
/* Insert the new or modified leaf tuple */
@@ -513,6 +517,18 @@ gininsert(Relation index, Datum *values, bool *isnull,
513517

514518
memset(&collector,0,sizeof(GinTupleCollector));
515519

520+
/*
521+
* With fastupdate on each scan and each insert begin with access to
522+
* pending list, so it effectively lock entire index. In this case
523+
* we aquire predicate lock and check for conflicts over index relation,
524+
* and hope that it will reduce locking overhead.
525+
*
526+
* Do not use GinCheckForSerializableConflictIn() here, because
527+
* it will do nothing (it does actual work only with fastupdate off).
528+
* Check for conflicts for entire index.
529+
*/
530+
CheckForSerializableConflictIn(index,NULL,InvalidBuffer);
531+
516532
for (i=0;i<ginstate->origTupdesc->natts;i++)
517533
ginHeapTupleFastCollect(ginstate,&collector,
518534
(OffsetNumber) (i+1),
@@ -523,6 +539,16 @@ gininsert(Relation index, Datum *values, bool *isnull,
523539
}
524540
else
525541
{
542+
GinStatsDatastats;
543+
544+
/*
545+
* Fastupdate is off but if pending list isn't empty then we need to
546+
* check conflicts with PredicateLockRelation in scanPendingInsert().
547+
*/
548+
ginGetStats(index,&stats);
549+
if (stats.nPendingPages>0)
550+
CheckForSerializableConflictIn(index,NULL,InvalidBuffer);
551+
526552
for (i=0;i<ginstate->origTupdesc->natts;i++)
527553
ginHeapTupleInsert(ginstate, (OffsetNumber) (i+1),
528554
values[i],isnull[i],

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp