Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc952eae

Browse files
committed
Check for conflicting queries during replay of gistvacuumpage()
013ebc0 implements so-called GiST microvacuum. That is gistgettuple() marksindex tuples as dead when kill_prior_tuple is set. Later, when new tupleinsertion claims page space, those dead index tuples are physically deletedfrom page. When this deletion is replayed on standby, it might conflict withread-only queries. But013ebc0 doesn't handle this. That may lead todisappearance of some tuples from read-only snapshots on standby.This commit implements resolving of conflicts between replay of GiST microvacuumand standby queries. On the master we implement new WAL record typeXLOG_GIST_DELETE, which comprises necessary information. On stable releaseswe've to be tricky to keep WAL compatibility. Information required for conflictprocessing is just appended to data of XLOG_GIST_PAGE_UPDATE record. So,PostgreSQL version, which doesn't know about conflict processing, will justignore that.Reported-by: Andres FreundDiagnosed-by: Andres FreundDiscussion:https://postgr.es/m/20181212224524.scafnlyjindmrbe6%40alap3.anarazel.deAuthor: Alexander KorotkovBackpatch-through: 9.6
1 parent7c15cef commitc952eae

File tree

6 files changed

+298
-13
lines changed

6 files changed

+298
-13
lines changed

‎src/backend/access/gist/gist.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
3838
boolunlockbuf,boolunlockleftchild);
3939
staticvoidgistfinishsplit(GISTInsertState*state,GISTInsertStack*stack,
4040
GISTSTATE*giststate,List*splitinfo,boolreleasebuf);
41-
staticvoidgistvacuumpage(Relationrel,Pagepage,Bufferbuffer);
41+
staticvoidgistvacuumpage(Relationrel,Pagepage,Bufferbuffer,
42+
RelationheapRel);
4243

4344

4445
#defineROTATEDIST(d) do { \
@@ -172,7 +173,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
172173
values,isnull, true/* size is currently bogus */ );
173174
itup->t_tid=*ht_ctid;
174175

175-
gistdoinsert(r,itup,0,giststate);
176+
gistdoinsert(r,itup,0,giststate,heapRel);
176177

177178
/* cleanup */
178179
MemoryContextSwitchTo(oldCxt);
@@ -218,7 +219,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
218219
BlockNumber*newblkno,
219220
Bufferleftchildbuf,
220221
List**splitinfo,
221-
boolmarkfollowright)
222+
boolmarkfollowright,
223+
RelationheapRel)
222224
{
223225
BlockNumberblkno=BufferGetBlockNumber(buffer);
224226
Pagepage=BufferGetPage(buffer);
@@ -259,7 +261,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
259261
*/
260262
if (is_split&&GistPageIsLeaf(page)&&GistPageHasGarbage(page))
261263
{
262-
gistvacuumpage(rel,page,buffer);
264+
gistvacuumpage(rel,page,buffer,heapRel);
263265
is_split=gistnospace(page,itup,ntup,oldoffnum,freespace);
264266
}
265267

@@ -604,7 +606,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
604606
* so it does not bother releasing palloc'd allocations.
605607
*/
606608
void
607-
gistdoinsert(Relationr,IndexTupleitup,Sizefreespace,GISTSTATE*giststate)
609+
gistdoinsert(Relationr,IndexTupleitup,Sizefreespace,
610+
GISTSTATE*giststate,RelationheapRel)
608611
{
609612
ItemIdiid;
610613
IndexTupleidxtuple;
@@ -616,6 +619,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
616619
memset(&state,0,sizeof(GISTInsertState));
617620
state.freespace=freespace;
618621
state.r=r;
622+
state.heapRel=heapRel;
619623

620624
/* Start from the root */
621625
firststack.blkno=GIST_ROOT_BLKNO;
@@ -1232,7 +1236,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
12321236
oldoffnum,NULL,
12331237
leftchild,
12341238
&splitinfo,
1235-
true);
1239+
true,
1240+
state->heapRel);
12361241

12371242
/*
12381243
* Before recursing up in case the page was split, release locks on the
@@ -1543,7 +1548,7 @@ freeGISTstate(GISTSTATE *giststate)
15431548
* Function assumes that buffer is exclusively locked.
15441549
*/
15451550
staticvoid
1546-
gistvacuumpage(Relationrel,Pagepage,Bufferbuffer)
1551+
gistvacuumpage(Relationrel,Pagepage,Bufferbuffer,RelationheapRel)
15471552
{
15481553
OffsetNumberdeletable[MaxIndexTuplesPerPage];
15491554
intndeletable=0;
@@ -1589,9 +1594,9 @@ gistvacuumpage(Relation rel, Page page, Buffer buffer)
15891594
{
15901595
XLogRecPtrrecptr;
15911596

1592-
recptr=gistXLogUpdate(buffer,
1597+
recptr=gistXLogDelete(buffer,
15931598
deletable,ndeletable,
1594-
NULL,0,InvalidBuffer);
1599+
heapRel->rd_node);
15951600

15961601
PageSetLSN(page,recptr);
15971602
}

‎src/backend/access/gist/gistbuild.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ typedef enum
5656
typedefstruct
5757
{
5858
Relationindexrel;
59+
Relationheaprel;
5960
GISTSTATE*giststate;
6061

6162
int64indtuples;/* number of tuples indexed */
@@ -122,6 +123,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
122123
intfillfactor;
123124

124125
buildstate.indexrel=index;
126+
buildstate.heaprel=heap;
125127
if (index->rd_options)
126128
{
127129
/* Get buffering mode from the options string */
@@ -484,7 +486,7 @@ gistBuildCallback(Relation index,
484486
* locked, we call gistdoinsert directly.
485487
*/
486488
gistdoinsert(index,itup,buildstate->freespace,
487-
buildstate->giststate);
489+
buildstate->giststate,buildstate->heaprel);
488490
}
489491

490492
/* Update tuple count and total size. */
@@ -690,7 +692,8 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
690692
itup,ntup,oldoffnum,&placed_to_blk,
691693
InvalidBuffer,
692694
&splitinfo,
693-
false);
695+
false,
696+
buildstate->heaprel);
694697

695698
/*
696699
* If this is a root split, update the root path item kept in memory. This

‎src/backend/access/gist/gistxlog.c

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,12 @@
1616
#include"access/bufmask.h"
1717
#include"access/gist_private.h"
1818
#include"access/gistxlog.h"
19+
#include"access/heapam_xlog.h"
20+
#include"access/transam.h"
1921
#include"access/xloginsert.h"
2022
#include"access/xlogutils.h"
23+
#include"miscadmin.h"
24+
#include"storage/procarray.h"
2125
#include"utils/memutils.h"
2226

2327
staticMemoryContextopCtx;/* working memory for operations */
@@ -160,6 +164,210 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
160164
UnlockReleaseBuffer(buffer);
161165
}
162166

167+
/*
168+
* Get the latestRemovedXid from the heap pages pointed at by the index
169+
* tuples being deleted. See also btree_xlog_delete_get_latestRemovedXid,
170+
* on which this function is based.
171+
*/
172+
staticTransactionId
173+
gistRedoDeleteRecordGetLatestRemovedXid(XLogReaderState*record)
174+
{
175+
gistxlogDelete*xlrec= (gistxlogDelete*)XLogRecGetData(record);
176+
OffsetNumber*todelete;
177+
Bufferibuffer,
178+
hbuffer;
179+
Pageipage,
180+
hpage;
181+
RelFileNodernode;
182+
BlockNumberblkno;
183+
ItemIdiitemid,
184+
hitemid;
185+
IndexTupleitup;
186+
HeapTupleHeaderhtuphdr;
187+
BlockNumberhblkno;
188+
OffsetNumberhoffnum;
189+
TransactionIdlatestRemovedXid=InvalidTransactionId;
190+
inti;
191+
192+
/*
193+
* If there's nothing running on the standby we don't need to derive a
194+
* full latestRemovedXid value, so use a fast path out of here. This
195+
* returns InvalidTransactionId, and so will conflict with all HS
196+
* transactions; but since we just worked out that that's zero people,
197+
* it's OK.
198+
*
199+
* XXX There is a race condition here, which is that a new backend might
200+
* start just after we look. If so, it cannot need to conflict, but this
201+
* coding will result in throwing a conflict anyway.
202+
*/
203+
if (CountDBBackends(InvalidOid)==0)
204+
returnlatestRemovedXid;
205+
206+
/*
207+
* In what follows, we have to examine the previous state of the index
208+
* page, as well as the heap page(s) it points to. This is only valid if
209+
* WAL replay has reached a consistent database state; which means that
210+
* the preceding check is not just an optimization, but is *necessary*. We
211+
* won't have let in any user sessions before we reach consistency.
212+
*/
213+
if (!reachedConsistency)
214+
elog(PANIC,"gistRedoDeleteRecordGetLatestRemovedXid: cannot operate with inconsistent data");
215+
216+
/*
217+
* Get index page. If the DB is consistent, this should not fail, nor
218+
* should any of the heap page fetches below. If one does, we return
219+
* InvalidTransactionId to cancel all HS transactions. That's probably
220+
* overkill, but it's safe, and certainly better than panicking here.
221+
*/
222+
XLogRecGetBlockTag(record,0,&rnode,NULL,&blkno);
223+
ibuffer=XLogReadBufferExtended(rnode,MAIN_FORKNUM,blkno,RBM_NORMAL);
224+
if (!BufferIsValid(ibuffer))
225+
returnInvalidTransactionId;
226+
LockBuffer(ibuffer,BUFFER_LOCK_EXCLUSIVE);
227+
ipage= (Page)BufferGetPage(ibuffer);
228+
229+
/*
230+
* Loop through the deleted index items to obtain the TransactionId from
231+
* the heap items they point to.
232+
*/
233+
todelete= (OffsetNumber*) ((char*)xlrec+SizeOfGistxlogDelete);
234+
235+
for (i=0;i<xlrec->ntodelete;i++)
236+
{
237+
/*
238+
* Identify the index tuple about to be deleted
239+
*/
240+
iitemid=PageGetItemId(ipage,todelete[i]);
241+
itup= (IndexTuple)PageGetItem(ipage,iitemid);
242+
243+
/*
244+
* Locate the heap page that the index tuple points at
245+
*/
246+
hblkno=ItemPointerGetBlockNumber(&(itup->t_tid));
247+
hbuffer=XLogReadBufferExtended(xlrec->hnode,MAIN_FORKNUM,hblkno,RBM_NORMAL);
248+
if (!BufferIsValid(hbuffer))
249+
{
250+
UnlockReleaseBuffer(ibuffer);
251+
returnInvalidTransactionId;
252+
}
253+
LockBuffer(hbuffer,BUFFER_LOCK_SHARE);
254+
hpage= (Page)BufferGetPage(hbuffer);
255+
256+
/*
257+
* Look up the heap tuple header that the index tuple points at by
258+
* using the heap node supplied with the xlrec. We can't use
259+
* heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer.
260+
* Note that we are not looking at tuple data here, just headers.
261+
*/
262+
hoffnum=ItemPointerGetOffsetNumber(&(itup->t_tid));
263+
hitemid=PageGetItemId(hpage,hoffnum);
264+
265+
/*
266+
* Follow any redirections until we find something useful.
267+
*/
268+
while (ItemIdIsRedirected(hitemid))
269+
{
270+
hoffnum=ItemIdGetRedirect(hitemid);
271+
hitemid=PageGetItemId(hpage,hoffnum);
272+
CHECK_FOR_INTERRUPTS();
273+
}
274+
275+
/*
276+
* If the heap item has storage, then read the header and use that to
277+
* set latestRemovedXid.
278+
*
279+
* Some LP_DEAD items may not be accessible, so we ignore them.
280+
*/
281+
if (ItemIdHasStorage(hitemid))
282+
{
283+
htuphdr= (HeapTupleHeader)PageGetItem(hpage,hitemid);
284+
285+
HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr,&latestRemovedXid);
286+
}
287+
elseif (ItemIdIsDead(hitemid))
288+
{
289+
/*
290+
* Conjecture: if hitemid is dead then it had xids before the xids
291+
* marked on LP_NORMAL items. So we just ignore this item and move
292+
* onto the next, for the purposes of calculating
293+
* latestRemovedxids.
294+
*/
295+
}
296+
else
297+
Assert(!ItemIdIsUsed(hitemid));
298+
299+
UnlockReleaseBuffer(hbuffer);
300+
}
301+
302+
UnlockReleaseBuffer(ibuffer);
303+
304+
/*
305+
* If all heap tuples were LP_DEAD then we will be returning
306+
* InvalidTransactionId here, which avoids conflicts. This matches
307+
* existing logic which assumes that LP_DEAD tuples must already be older
308+
* than the latestRemovedXid on the cleanup record that set them as
309+
* LP_DEAD, hence must already have generated a conflict.
310+
*/
311+
returnlatestRemovedXid;
312+
}
313+
314+
/*
315+
* redo delete on gist index page to remove tuples marked as DEAD during index
316+
* tuple insertion
317+
*/
318+
staticvoid
319+
gistRedoDeleteRecord(XLogReaderState*record)
320+
{
321+
XLogRecPtrlsn=record->EndRecPtr;
322+
gistxlogDelete*xldata= (gistxlogDelete*)XLogRecGetData(record);
323+
Bufferbuffer;
324+
Pagepage;
325+
326+
/*
327+
* If we have any conflict processing to do, it must happen before we
328+
* update the page.
329+
*
330+
* GiST delete records can conflict with standby queries. You might think
331+
* that vacuum records would conflict as well, but we've handled that
332+
* already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
333+
* cleaned by the vacuum of the heap and so we can resolve any conflicts
334+
* just once when that arrives. After that we know that no conflicts
335+
* exist from individual gist vacuum records on that index.
336+
*/
337+
if (InHotStandby)
338+
{
339+
TransactionIdlatestRemovedXid=gistRedoDeleteRecordGetLatestRemovedXid(record);
340+
RelFileNodernode;
341+
342+
XLogRecGetBlockTag(record,0,&rnode,NULL,NULL);
343+
344+
ResolveRecoveryConflictWithSnapshot(latestRemovedXid,rnode);
345+
}
346+
347+
if (XLogReadBufferForRedo(record,0,&buffer)==BLK_NEEDS_REDO)
348+
{
349+
page= (Page)BufferGetPage(buffer);
350+
351+
if (XLogRecGetDataLen(record)>SizeOfGistxlogDelete)
352+
{
353+
OffsetNumber*todelete;
354+
355+
todelete= (OffsetNumber*) ((char*)xldata+SizeOfGistxlogDelete);
356+
357+
PageIndexMultiDelete(page,todelete,xldata->ntodelete);
358+
}
359+
360+
GistClearPageHasGarbage(page);
361+
GistMarkTuplesDeleted(page);
362+
363+
PageSetLSN(page,lsn);
364+
MarkBufferDirty(buffer);
365+
}
366+
367+
if (BufferIsValid(buffer))
368+
UnlockReleaseBuffer(buffer);
369+
}
370+
163371
/*
164372
* Returns an array of index pointers.
165373
*/
@@ -318,6 +526,9 @@ gist_redo(XLogReaderState *record)
318526
caseXLOG_GIST_PAGE_UPDATE:
319527
gistRedoPageUpdateRecord(record);
320528
break;
529+
caseXLOG_GIST_DELETE:
530+
gistRedoDeleteRecord(record);
531+
break;
321532
caseXLOG_GIST_PAGE_SPLIT:
322533
gistRedoPageSplitRecord(record);
323534
break;
@@ -487,3 +698,35 @@ gistXLogUpdate(Buffer buffer,
487698

488699
returnrecptr;
489700
}
701+
702+
/*
703+
* Write XLOG record describing a delete of leaf index tuples marked as DEAD
704+
* during new tuple insertion. One may think that this case is already covered
705+
* by gistXLogUpdate(). But deletion of index tuples might conflict with
706+
* standby queries and needs special handling.
707+
*/
708+
XLogRecPtr
709+
gistXLogDelete(Bufferbuffer,OffsetNumber*todelete,intntodelete,
710+
RelFileNodehnode)
711+
{
712+
gistxlogDeletexlrec;
713+
XLogRecPtrrecptr;
714+
715+
xlrec.hnode=hnode;
716+
xlrec.ntodelete=ntodelete;
717+
718+
XLogBeginInsert();
719+
XLogRegisterData((char*)&xlrec,SizeOfGistxlogDelete);
720+
721+
/*
722+
* We need the target-offsets array whether or not we store the whole
723+
* buffer, to allow us to find the latestRemovedXid on a standby server.
724+
*/
725+
XLogRegisterData((char*)todelete,ntodelete*sizeof(OffsetNumber));
726+
727+
XLogRegisterBuffer(0,buffer,REGBUF_STANDARD);
728+
729+
recptr=XLogInsert(RM_GIST_ID,XLOG_GIST_DELETE);
730+
731+
returnrecptr;
732+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp