Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1b02807

Browse files
committed
Check for conflicting queries during replay of gistvacuumpage()
013ebc0 implements so-called GiST microvacuum. That is gistgettuple() marksindex tuples as dead when kill_prior_tuple is set. Later, when new tupleinsertion claims page space, those dead index tuples are physically deletedfrom page. When this deletion is replayed on standby, it might conflict withread-only queries. But013ebc0 doesn't handle this. That may lead todisappearance of some tuples from read-only snapshots on standby.This commit implements resolving of conflicts between replay of GiST microvacuumand standby queries. On the master we implement new WAL record typeXLOG_GIST_DELETE, which comprises necessary information. On stable releaseswe've to be tricky to keep WAL compatibility. Information required for conflictprocessing is just appended to data of XLOG_GIST_PAGE_UPDATE record. So,PostgreSQL version, which doesn't know about conflict processing, will justignore that.Reported-by: Andres FreundDiagnosed-by: Andres FreundDiscussion:https://postgr.es/m/20181212224524.scafnlyjindmrbe6%40alap3.anarazel.deAuthor: Alexander KorotkovBackpatch-through: 9.6
1 parente13d8a7 commit1b02807

File tree

5 files changed

+220
-16
lines changed

5 files changed

+220
-16
lines changed

‎src/backend/access/gist/gist.c

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
3434
boolunlockbuf,boolunlockleftchild);
3535
staticvoidgistfinishsplit(GISTInsertState*state,GISTInsertStack*stack,
3636
GISTSTATE*giststate,List*splitinfo,boolreleasebuf);
37-
staticvoidgistvacuumpage(Relationrel,Pagepage,Bufferbuffer);
37+
staticvoidgistvacuumpage(Relationrel,Pagepage,Bufferbuffer,
38+
RelationheapRel);
3839

3940

4041
#defineROTATEDIST(d) do { \
@@ -161,7 +162,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
161162
values,isnull, true/* size is currently bogus */ );
162163
itup->t_tid=*ht_ctid;
163164

164-
gistdoinsert(r,itup,0,giststate);
165+
gistdoinsert(r,itup,0,giststate,heapRel);
165166

166167
/* cleanup */
167168
MemoryContextSwitchTo(oldCxt);
@@ -207,7 +208,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
207208
BlockNumber*newblkno,
208209
Bufferleftchildbuf,
209210
List**splitinfo,
210-
boolmarkfollowright)
211+
boolmarkfollowright,
212+
RelationheapRel)
211213
{
212214
BlockNumberblkno=BufferGetBlockNumber(buffer);
213215
Pagepage=BufferGetPage(buffer);
@@ -248,7 +250,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
248250
*/
249251
if (is_split&&GistPageIsLeaf(page)&&GistPageHasGarbage(page))
250252
{
251-
gistvacuumpage(rel,page,buffer);
253+
gistvacuumpage(rel,page,buffer,heapRel);
252254
is_split=gistnospace(page,itup,ntup,oldoffnum,freespace);
253255
}
254256

@@ -524,7 +526,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
524526

525527
recptr=gistXLogUpdate(buffer,
526528
deloffs,ndeloffs,itup,ntup,
527-
leftchildbuf);
529+
leftchildbuf,NULL);
528530

529531
PageSetLSN(page,recptr);
530532
}
@@ -572,7 +574,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
572574
* so it does not bother releasing palloc'd allocations.
573575
*/
574576
void
575-
gistdoinsert(Relationr,IndexTupleitup,Sizefreespace,GISTSTATE*giststate)
577+
gistdoinsert(Relationr,IndexTupleitup,Sizefreespace,
578+
GISTSTATE*giststate,RelationheapRel)
576579
{
577580
ItemIdiid;
578581
IndexTupleidxtuple;
@@ -584,6 +587,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
584587
memset(&state,0,sizeof(GISTInsertState));
585588
state.freespace=freespace;
586589
state.r=r;
590+
state.heapRel=heapRel;
587591

588592
/* Start from the root */
589593
firststack.blkno=GIST_ROOT_BLKNO;
@@ -1194,7 +1198,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
11941198
oldoffnum,NULL,
11951199
leftchild,
11961200
&splitinfo,
1197-
true);
1201+
true,
1202+
state->heapRel);
11981203

11991204
/*
12001205
* Before recursing up in case the page was split, release locks on the
@@ -1493,7 +1498,7 @@ freeGISTstate(GISTSTATE *giststate)
14931498
* Function assumes that buffer is exclusively locked.
14941499
*/
14951500
staticvoid
1496-
gistvacuumpage(Relationrel,Pagepage,Bufferbuffer)
1501+
gistvacuumpage(Relationrel,Pagepage,Bufferbuffer,RelationheapRel)
14971502
{
14981503
OffsetNumberdeletable[MaxIndexTuplesPerPage];
14991504
intndeletable=0;
@@ -1541,7 +1546,8 @@ gistvacuumpage(Relation rel, Page page, Buffer buffer)
15411546

15421547
recptr=gistXLogUpdate(buffer,
15431548
deletable,ndeletable,
1544-
NULL,0,InvalidBuffer);
1549+
NULL,0,InvalidBuffer,
1550+
&heapRel->rd_node);
15451551

15461552
PageSetLSN(page,recptr);
15471553
}

‎src/backend/access/gist/gistbuild.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ typedef enum
5555
typedefstruct
5656
{
5757
Relationindexrel;
58+
Relationheaprel;
5859
GISTSTATE*giststate;
5960

6061
int64indtuples;/* number of tuples indexed */
@@ -121,6 +122,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
121122
intfillfactor;
122123

123124
buildstate.indexrel=index;
125+
buildstate.heaprel=heap;
124126
if (index->rd_options)
125127
{
126128
/* Get buffering mode from the options string */
@@ -483,7 +485,7 @@ gistBuildCallback(Relation index,
483485
* locked, we call gistdoinsert directly.
484486
*/
485487
gistdoinsert(index,itup,buildstate->freespace,
486-
buildstate->giststate);
488+
buildstate->giststate,buildstate->heaprel);
487489
}
488490

489491
/* Update tuple count and total size. */
@@ -689,7 +691,8 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
689691
itup,ntup,oldoffnum,&placed_to_blk,
690692
InvalidBuffer,
691693
&splitinfo,
692-
false);
694+
false,
695+
buildstate->heaprel);
693696

694697
/*
695698
* If this is a root split, update the root path item kept in memory. This

‎src/backend/access/gist/gistvacuum.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,8 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
224224

225225
recptr=gistXLogUpdate(buffer,
226226
todelete,ntodelete,
227-
NULL,0,InvalidBuffer);
227+
NULL,0,InvalidBuffer,
228+
NULL);
228229
PageSetLSN(page,recptr);
229230
}
230231
else

‎src/backend/access/gist/gistxlog.c

Lines changed: 192 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,12 @@
1414
#include"postgres.h"
1515

1616
#include"access/gist_private.h"
17+
#include"access/heapam_xlog.h"
18+
#include"access/transam.h"
1719
#include"access/xloginsert.h"
1820
#include"access/xlogutils.h"
21+
#include"miscadmin.h"
22+
#include"storage/procarray.h"
1923
#include"utils/memutils.h"
2024

2125
staticMemoryContextopCtx;/* working memory for operations */
@@ -58,6 +62,155 @@ gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
5862
UnlockReleaseBuffer(buffer);
5963
}
6064

65+
/*
66+
* Get the latestRemovedXid from the heap pages pointed at by the index
67+
* tuples being deleted. See also btree_xlog_delete_get_latestRemovedXid,
68+
* on which this function is based.
69+
*/
70+
staticTransactionId
71+
gistRedoPageUpdateRecordGetLatestRemovedXid(XLogReaderState*record)
72+
{
73+
gistxlogPageUpdate*xlrec= (gistxlogPageUpdate*)XLogRecGetData(record);
74+
OffsetNumber*todelete;
75+
Bufferibuffer,
76+
hbuffer;
77+
Pageipage,
78+
hpage;
79+
RelFileNodernode,
80+
*hnode;
81+
BlockNumberblkno;
82+
ItemIdiitemid,
83+
hitemid;
84+
IndexTupleitup;
85+
HeapTupleHeaderhtuphdr;
86+
BlockNumberhblkno;
87+
OffsetNumberhoffnum;
88+
TransactionIdlatestRemovedXid=InvalidTransactionId;
89+
inti;
90+
91+
/*
92+
* If there's nothing running on the standby we don't need to derive a
93+
* full latestRemovedXid value, so use a fast path out of here. This
94+
* returns InvalidTransactionId, and so will conflict with all HS
95+
* transactions; but since we just worked out that that's zero people,
96+
* it's OK.
97+
*
98+
* XXX There is a race condition here, which is that a new backend might
99+
* start just after we look. If so, it cannot need to conflict, but this
100+
* coding will result in throwing a conflict anyway.
101+
*/
102+
if (CountDBBackends(InvalidOid)==0)
103+
returnlatestRemovedXid;
104+
105+
/*
106+
* In what follows, we have to examine the previous state of the index
107+
* page, as well as the heap page(s) it points to. This is only valid if
108+
* WAL replay has reached a consistent database state; which means that
109+
* the preceding check is not just an optimization, but is *necessary*. We
110+
* won't have let in any user sessions before we reach consistency.
111+
*/
112+
if (!reachedConsistency)
113+
elog(PANIC,"gistRedoDeleteRecordGetLatestRemovedXid: cannot operate with inconsistent data");
114+
115+
/*
116+
* Get index page. If the DB is consistent, this should not fail, nor
117+
* should any of the heap page fetches below. If one does, we return
118+
* InvalidTransactionId to cancel all HS transactions. That's probably
119+
* overkill, but it's safe, and certainly better than panicking here.
120+
*/
121+
XLogRecGetBlockTag(record,0,&rnode,NULL,&blkno);
122+
ibuffer=XLogReadBufferExtended(rnode,MAIN_FORKNUM,blkno,RBM_NORMAL);
123+
if (!BufferIsValid(ibuffer))
124+
returnInvalidTransactionId;
125+
LockBuffer(ibuffer,BUFFER_LOCK_EXCLUSIVE);
126+
ipage= (Page)BufferGetPage(ibuffer);
127+
128+
/*
129+
* Loop through the deleted index items to obtain the TransactionId from
130+
* the heap items they point to.
131+
*/
132+
hnode= (RelFileNode*) ((char*)xlrec+sizeof(gistxlogPageUpdate));
133+
todelete= (OffsetNumber*) ((char*)hnode+sizeof(RelFileNode));
134+
135+
for (i=0;i<xlrec->ntodelete;i++)
136+
{
137+
/*
138+
* Identify the index tuple about to be deleted
139+
*/
140+
iitemid=PageGetItemId(ipage,todelete[i]);
141+
itup= (IndexTuple)PageGetItem(ipage,iitemid);
142+
143+
/*
144+
* Locate the heap page that the index tuple points at
145+
*/
146+
hblkno=ItemPointerGetBlockNumber(&(itup->t_tid));
147+
hbuffer=XLogReadBufferExtended(*hnode,MAIN_FORKNUM,hblkno,RBM_NORMAL);
148+
if (!BufferIsValid(hbuffer))
149+
{
150+
UnlockReleaseBuffer(ibuffer);
151+
returnInvalidTransactionId;
152+
}
153+
LockBuffer(hbuffer,BUFFER_LOCK_SHARE);
154+
hpage= (Page)BufferGetPage(hbuffer);
155+
156+
/*
157+
* Look up the heap tuple header that the index tuple points at by
158+
* using the heap node supplied with the xlrec. We can't use
159+
* heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer.
160+
* Note that we are not looking at tuple data here, just headers.
161+
*/
162+
hoffnum=ItemPointerGetOffsetNumber(&(itup->t_tid));
163+
hitemid=PageGetItemId(hpage,hoffnum);
164+
165+
/*
166+
* Follow any redirections until we find something useful.
167+
*/
168+
while (ItemIdIsRedirected(hitemid))
169+
{
170+
hoffnum=ItemIdGetRedirect(hitemid);
171+
hitemid=PageGetItemId(hpage,hoffnum);
172+
CHECK_FOR_INTERRUPTS();
173+
}
174+
175+
/*
176+
* If the heap item has storage, then read the header and use that to
177+
* set latestRemovedXid.
178+
*
179+
* Some LP_DEAD items may not be accessible, so we ignore them.
180+
*/
181+
if (ItemIdHasStorage(hitemid))
182+
{
183+
htuphdr= (HeapTupleHeader)PageGetItem(hpage,hitemid);
184+
185+
HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr,&latestRemovedXid);
186+
}
187+
elseif (ItemIdIsDead(hitemid))
188+
{
189+
/*
190+
* Conjecture: if hitemid is dead then it had xids before the xids
191+
* marked on LP_NORMAL items. So we just ignore this item and move
192+
* onto the next, for the purposes of calculating
193+
* latestRemovedxids.
194+
*/
195+
}
196+
else
197+
Assert(!ItemIdIsUsed(hitemid));
198+
199+
UnlockReleaseBuffer(hbuffer);
200+
}
201+
202+
UnlockReleaseBuffer(ibuffer);
203+
204+
/*
205+
* If all heap tuples were LP_DEAD then we will be returning
206+
* InvalidTransactionId here, which avoids conflicts. This matches
207+
* existing logic which assumes that LP_DEAD tuples must already be older
208+
* than the latestRemovedXid on the cleanup record that set them as
209+
* LP_DEAD, hence must already have generated a conflict.
210+
*/
211+
returnlatestRemovedXid;
212+
}
213+
61214
/*
62215
* redo any page update (except page split)
63216
*/
@@ -69,6 +222,34 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
69222
Bufferbuffer;
70223
Pagepage;
71224

225+
/*
226+
* If we have any conflict processing to do, it must happen before we
227+
* update the page.
228+
*
229+
* Support for conflict processing in GiST has been backpatched. This is
230+
* why we have to use tricky way of saving WAL-compatibility between minor
231+
* versions. Information required for conflict processing is just
232+
* appended to data of XLOG_GIST_PAGE_UPDATE record. So, PostgreSQL
233+
* version, which doesn't know about conflict processing, will just ignore
234+
* that.
235+
*
236+
* GiST delete records can conflict with standby queries. You might think
237+
* that vacuum records would conflict as well, but we've handled that
238+
* already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
239+
* cleaned by the vacuum of the heap and so we can resolve any conflicts
240+
* just once when that arrives. After that we know that no conflicts
241+
* exist from individual gist vacuum records on that index.
242+
*/
243+
if (InHotStandby&&XLogRecGetDataLen(record)>sizeof(gistxlogPageUpdate))
244+
{
245+
TransactionIdlatestRemovedXid=gistRedoPageUpdateRecordGetLatestRemovedXid(record);
246+
RelFileNodernode;
247+
248+
XLogRecGetBlockTag(record,0,&rnode,NULL,NULL);
249+
250+
ResolveRecoveryConflictWithSnapshot(latestRemovedXid,rnode);
251+
}
252+
72253
if (XLogReadBufferForRedo(record,0,&buffer)==BLK_NEEDS_REDO)
73254
{
74255
char*begin;
@@ -390,7 +571,7 @@ XLogRecPtr
390571
gistXLogUpdate(Bufferbuffer,
391572
OffsetNumber*todelete,intntodelete,
392573
IndexTuple*itup,intituplen,
393-
Bufferleftchildbuf)
574+
Bufferleftchildbuf,RelFileNode*hnode)
394575
{
395576
gistxlogPageUpdatexlrec;
396577
inti;
@@ -402,6 +583,16 @@ gistXLogUpdate(Buffer buffer,
402583
XLogBeginInsert();
403584
XLogRegisterData((char*)&xlrec,sizeof(gistxlogPageUpdate));
404585

586+
/*
587+
* Append the information required for standby conflict processing if it
588+
* is provided by caller.
589+
*/
590+
if (hnode)
591+
{
592+
XLogRegisterData((char*)hnode,sizeof(RelFileNode));
593+
XLogRegisterData((char*)todelete,sizeof(OffsetNumber)*ntodelete);
594+
}
595+
405596
XLogRegisterBuffer(0,buffer,REGBUF_STANDARD);
406597
XLogRegisterBufData(0, (char*)todelete,sizeof(OffsetNumber)*ntodelete);
407598

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp