Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit631118f

Browse files
committed
Get rid of the post-recovery cleanup step of GIN page splits.
Replace it with an approach similar to what GiST uses: when a page is split,the left sibling is marked with a flag indicating that the parent hasn't beenupdated yet. When the parent is updated, the flag is cleared. If an insertionsteps on a page with the flag set, it will finish split before proceedingwith the insertion.The post-recovery cleanup mechanism was never totally reliable, as insertionto the parent could fail e.g because of running out of memory or disk space,leaving the tree in an inconsistent state.This also divides the responsibility of WAL-logging more clearly betweenthe generic ginbtree.c code, and the parts specific to entry and postingtrees. There is now a common WAL record format for insertions and deletions,which is written by ginbtree.c, followed by tree-specific payload, which isreturned by the placetopage- and split- callbacks.
1 parentce5326e commit631118f

File tree

9 files changed

+666
-556
lines changed

9 files changed

+666
-556
lines changed

‎src/backend/access/gin/ginbtree.c

Lines changed: 315 additions & 132 deletions
Large diffs are not rendered by default.

‎src/backend/access/gin/gindatapage.c

Lines changed: 28 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ GinDataPageAddItemPointer(Page page, ItemPointer data, OffsetNumber offset)
227227
OffsetNumbermaxoff=GinPageGetOpaque(page)->maxoff;
228228
char*ptr;
229229

230+
Assert(ItemPointerIsValid(data));
230231
Assert(GinPageIsLeaf(page));
231232

232233
if (offset==InvalidOffsetNumber)
@@ -255,6 +256,7 @@ GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
255256
OffsetNumbermaxoff=GinPageGetOpaque(page)->maxoff;
256257
char*ptr;
257258

259+
Assert(PostingItemGetBlockNumber(data)!=InvalidBlockNumber);
258260
Assert(!GinPageIsLeaf(page));
259261

260262
if (offset==InvalidOffsetNumber)
@@ -338,11 +340,8 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
338340
XLogRecData**prdata)
339341
{
340342
Pagepage=BufferGetPage(buf);
341-
intcnt=0;
342-
343343
/* these must be static so they can be returned to caller */
344-
staticXLogRecDatardata[3];
345-
staticginxlogInsertdata;
344+
staticXLogRecDatardata[2];
346345

347346
/* quick exit if it doesn't fit */
348347
if (!dataIsEnoughSpace(btree,buf,off,insertdata))
@@ -359,45 +358,10 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
359358
PostingItemSetBlockNumber(pitem,updateblkno);
360359
}
361360

362-
data.updateBlkno=updateblkno;
363-
data.node=btree->index->rd_node;
364-
data.blkno=BufferGetBlockNumber(buf);
365-
data.offset=off;
366-
data.nitem=1;
367-
data.isDelete= FALSE;
368-
data.isData= TRUE;
369-
data.isLeaf=GinPageIsLeaf(page) ? TRUE : FALSE;
370-
371-
/*
372-
* Prevent full page write if child's split occurs. That is needed to
373-
* remove incomplete splits while replaying WAL
374-
*
375-
* data.updateBlkno contains new block number (of newly created right
376-
* page) for recently splited page.
377-
*/
378-
if (data.updateBlkno==InvalidBlockNumber)
379-
{
380-
rdata[0].buffer=buf;
381-
rdata[0].buffer_std= FALSE;
382-
rdata[0].data=NULL;
383-
rdata[0].len=0;
384-
rdata[0].next=&rdata[1];
385-
cnt++;
386-
}
387-
388-
rdata[cnt].buffer=InvalidBuffer;
389-
rdata[cnt].data= (char*)&data;
390-
rdata[cnt].len=sizeof(ginxlogInsert);
391-
rdata[cnt].next=&rdata[cnt+1];
392-
cnt++;
393-
394-
rdata[cnt].buffer=InvalidBuffer;
395-
/* data and len filled in below */
396-
rdata[cnt].next=NULL;
397-
398361
if (GinPageIsLeaf(page))
399362
{
400363
GinBtreeDataLeafInsertData*items=insertdata;
364+
staticginxlogInsertDataLeafdata;
401365
uint32savedPos=items->curitem;
402366

403367
if (GinPageRightMost(page)&&off>GinPageGetOpaque(page)->maxoff)
@@ -415,19 +379,29 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
415379
{
416380
GinDataPageAddItemPointer(page,items->items+items->curitem,off);
417381
items->curitem++;
382+
data.nitem=1;
418383
}
419384

420-
rdata[cnt].data= (char*)&items->items[savedPos];
421-
rdata[cnt].len=sizeof(ItemPointerData)*data.nitem;
385+
rdata[0].buffer=InvalidBuffer;
386+
rdata[0].data= (char*)&data;
387+
rdata[0].len= offsetof(ginxlogInsertDataLeaf,items);
388+
rdata[0].next=&rdata[1];
389+
390+
rdata[1].buffer=InvalidBuffer;
391+
rdata[1].data= (char*)&items->items[savedPos];
392+
rdata[1].len=sizeof(ItemPointerData)*data.nitem;
393+
rdata[1].next=NULL;
422394
}
423395
else
424396
{
425397
PostingItem*pitem=insertdata;
426398

427399
GinDataPageAddPostingItem(page,pitem,off);
428400

429-
rdata[cnt].data= (char*)pitem;
430-
rdata[cnt].len=sizeof(PostingItem);
401+
rdata[0].buffer=InvalidBuffer;
402+
rdata[0].data= (char*)pitem;
403+
rdata[0].len=sizeof(PostingItem);
404+
rdata[0].next=NULL;
431405
}
432406

433407
return true;
@@ -456,8 +430,8 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
456430
SizefreeSpace;
457431

458432
/* these must be static so they can be returned to caller */
459-
staticginxlogSplitdata;
460-
staticXLogRecDatardata[4];
433+
staticginxlogSplitDatadata;
434+
staticXLogRecDatardata[2];
461435
staticcharvector[2*BLCKSZ];
462436

463437
GinInitPage(rpage,GinPageGetOpaque(lpage)->flags,pageSize);
@@ -488,6 +462,7 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
488462

489463
if (isleaf&&GinPageRightMost(lpage)&&off>GinPageGetOpaque(lpage)->maxoff)
490464
{
465+
/* append new items to the end */
491466
GinBtreeDataLeafInsertData*items=insertdata;
492467

493468
while (items->curitem<items->nitem&&
@@ -566,25 +541,18 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
566541
bound=GinDataPageGetRightBound(rpage);
567542
*bound=oldbound;
568543

569-
data.node=btree->index->rd_node;
570-
data.rootBlkno=InvalidBlockNumber;
571-
data.lblkno=BufferGetBlockNumber(lbuf);
572-
data.rblkno=BufferGetBlockNumber(rbuf);
573544
data.separator=separator;
574545
data.nitem=maxoff;
575-
data.isData= TRUE;
576-
data.isLeaf=GinPageIsLeaf(lpage) ? TRUE : FALSE;
577-
data.isRootSplit= FALSE;
578546
data.rightbound=oldbound;
579547

580548
rdata[0].buffer=InvalidBuffer;
581549
rdata[0].data= (char*)&data;
582-
rdata[0].len=sizeof(ginxlogSplit);
550+
rdata[0].len=sizeof(ginxlogSplitData);
583551
rdata[0].next=&rdata[1];
584552

585553
rdata[1].buffer=InvalidBuffer;
586554
rdata[1].data=vector;
587-
rdata[1].len=MAXALIGN(maxoff*sizeofitem);
555+
rdata[1].len=maxoff*sizeofitem;
588556
rdata[1].next=NULL;
589557

590558
returnlpage;
@@ -610,21 +578,18 @@ dataPrepareDownlink(GinBtree btree, Buffer lbuf)
610578
* Also called from ginxlog, should not use btree
611579
*/
612580
void
613-
ginDataFillRoot(GinBtreebtree,Bufferroot,Bufferlbuf,Bufferrbuf)
581+
ginDataFillRoot(GinBtreebtree,Pageroot,BlockNumberlblkno,Pagelpage,BlockNumberrblkno,Pagerpage)
614582
{
615-
Pagepage=BufferGetPage(root),
616-
lpage=BufferGetPage(lbuf),
617-
rpage=BufferGetPage(rbuf);
618583
PostingItemli,
619584
ri;
620585

621586
li.key=*GinDataPageGetRightBound(lpage);
622-
PostingItemSetBlockNumber(&li,BufferGetBlockNumber(lbuf));
623-
GinDataPageAddPostingItem(page,&li,InvalidOffsetNumber);
587+
PostingItemSetBlockNumber(&li,lblkno);
588+
GinDataPageAddPostingItem(root,&li,InvalidOffsetNumber);
624589

625590
ri.key=*GinDataPageGetRightBound(rpage);
626-
PostingItemSetBlockNumber(&ri,BufferGetBlockNumber(rbuf));
627-
GinDataPageAddPostingItem(page,&ri,InvalidOffsetNumber);
591+
PostingItemSetBlockNumber(&ri,rblkno);
592+
GinDataPageAddPostingItem(root,&ri,InvalidOffsetNumber);
628593
}
629594

630595
/*

‎src/backend/access/gin/ginentrypage.c

Lines changed: 22 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -504,15 +504,14 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
504504

505505
/* these must be static so they can be returned to caller */
506506
staticXLogRecDatardata[3];
507-
staticginxlogInsertdata;
507+
staticginxlogInsertEntrydata;
508508

509509
/* quick exit if it doesn't fit */
510510
if (!entryIsEnoughSpace(btree,buf,off,insertData))
511511
return false;
512512

513513
*prdata=rdata;
514514
entryPreparePage(btree,page,off,insertData,updateblkno);
515-
data.updateBlkno=updateblkno;
516515

517516
placed=PageAddItem(page,
518517
(Item)insertData->entry,
@@ -522,34 +521,11 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
522521
elog(ERROR,"failed to add item to index page in \"%s\"",
523522
RelationGetRelationName(btree->index));
524523

525-
data.node=btree->index->rd_node;
526-
data.blkno=BufferGetBlockNumber(buf);
527-
data.offset=off;
528-
data.nitem=1;
529524
data.isDelete=insertData->isDelete;
530-
data.isData= false;
531-
data.isLeaf=GinPageIsLeaf(page) ? TRUE : FALSE;
532-
533-
/*
534-
* Prevent full page write if child's split occurs. That is needed to
535-
* remove incomplete splits while replaying WAL
536-
*
537-
* data.updateBlkno contains new block number (of newly created right
538-
* page) for recently splited page.
539-
*/
540-
if (data.updateBlkno==InvalidBlockNumber)
541-
{
542-
rdata[0].buffer=buf;
543-
rdata[0].buffer_std= TRUE;
544-
rdata[0].data=NULL;
545-
rdata[0].len=0;
546-
rdata[0].next=&rdata[1];
547-
cnt++;
548-
}
549525

550526
rdata[cnt].buffer=InvalidBuffer;
551527
rdata[cnt].data= (char*)&data;
552-
rdata[cnt].len=sizeof(ginxlogInsert);
528+
rdata[cnt].len=offsetof(ginxlogInsertEntry,tuple);
553529
rdata[cnt].next=&rdata[cnt+1];
554530
cnt++;
555531

@@ -577,6 +553,7 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
577553
maxoff,
578554
separator=InvalidOffsetNumber;
579555
Sizetotalsize=0;
556+
Sizetupstoresize;
580557
Sizelsize=0,
581558
size;
582559
char*ptr;
@@ -588,18 +565,18 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
588565

589566
/* these must be static so they can be returned to caller */
590567
staticXLogRecDatardata[2];
591-
staticginxlogSplitdata;
568+
staticginxlogSplitEntrydata;
592569
staticchartupstore[2*BLCKSZ];
593570

594571
*prdata=rdata;
595-
data.leftChildBlkno= (GinPageIsLeaf(lpage)) ?
596-
InvalidOffsetNumber :GinGetDownlink(insertData->entry);
597-
data.updateBlkno=updateblkno;
598572
entryPreparePage(btree,lpage,off,insertData,updateblkno);
599573

574+
/*
575+
* First, append all the existing tuples and the new tuple we're inserting
576+
* one after another in a temporary workspace.
577+
*/
600578
maxoff=PageGetMaxOffsetNumber(lpage);
601579
ptr=tupstore;
602-
603580
for (i=FirstOffsetNumber;i <=maxoff;i++)
604581
{
605582
if (i==off)
@@ -624,7 +601,12 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
624601
ptr+=size;
625602
totalsize+=size+sizeof(ItemIdData);
626603
}
604+
tupstoresize=ptr-tupstore;
627605

606+
/*
607+
* Initialize the left and right pages, and copy all the tuples back to
608+
* them.
609+
*/
628610
GinInitPage(rpage,GinPageGetOpaque(lpage)->flags,pageSize);
629611
GinInitPage(lpage,GinPageGetOpaque(rpage)->flags,pageSize);
630612

@@ -654,24 +636,17 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
654636
ptr+=MAXALIGN(IndexTupleSize(itup));
655637
}
656638

657-
data.node=btree->index->rd_node;
658-
data.rootBlkno=InvalidBlockNumber;
659-
data.lblkno=BufferGetBlockNumber(lbuf);
660-
data.rblkno=BufferGetBlockNumber(rbuf);
661639
data.separator=separator;
662640
data.nitem=maxoff;
663-
data.isData= FALSE;
664-
data.isLeaf=GinPageIsLeaf(lpage) ? TRUE : FALSE;
665-
data.isRootSplit= FALSE;
666641

667642
rdata[0].buffer=InvalidBuffer;
668643
rdata[0].data= (char*)&data;
669-
rdata[0].len=sizeof(ginxlogSplit);
644+
rdata[0].len=sizeof(ginxlogSplitEntry);
670645
rdata[0].next=&rdata[1];
671646

672647
rdata[1].buffer=InvalidBuffer;
673648
rdata[1].data=tupstore;
674-
rdata[1].len=MAXALIGN(totalsize);
649+
rdata[1].len=tupstoresize;
675650
rdata[1].next=NULL;
676651

677652
returnlpage;
@@ -702,24 +677,19 @@ entryPrepareDownlink(GinBtree btree, Buffer lbuf)
702677
* Also called from ginxlog, should not use btree
703678
*/
704679
void
705-
ginEntryFillRoot(GinBtreebtree,Bufferroot,Bufferlbuf,Bufferrbuf)
680+
ginEntryFillRoot(GinBtreebtree,Pageroot,
681+
BlockNumberlblkno,Pagelpage,
682+
BlockNumberrblkno,Pagerpage)
706683
{
707-
Pagepage=BufferGetPage(root);
708-
Pagelpage=BufferGetPage(lbuf);
709-
Pagerpage=BufferGetPage(rbuf);
710684
IndexTupleitup;
711685

712-
itup=GinFormInteriorTuple(getRightMostTuple(lpage),
713-
lpage,
714-
BufferGetBlockNumber(lbuf));
715-
if (PageAddItem(page, (Item)itup,IndexTupleSize(itup),InvalidOffsetNumber, false, false)==InvalidOffsetNumber)
686+
itup=GinFormInteriorTuple(getRightMostTuple(lpage),lpage,lblkno);
687+
if (PageAddItem(root, (Item)itup,IndexTupleSize(itup),InvalidOffsetNumber, false, false)==InvalidOffsetNumber)
716688
elog(ERROR,"failed to add item to index root page");
717689
pfree(itup);
718690

719-
itup=GinFormInteriorTuple(getRightMostTuple(rpage),
720-
rpage,
721-
BufferGetBlockNumber(rbuf));
722-
if (PageAddItem(page, (Item)itup,IndexTupleSize(itup),InvalidOffsetNumber, false, false)==InvalidOffsetNumber)
691+
itup=GinFormInteriorTuple(getRightMostTuple(rpage),rpage,rblkno);
692+
if (PageAddItem(root, (Item)itup,IndexTupleSize(itup),InvalidOffsetNumber, false, false)==InvalidOffsetNumber)
723693
elog(ERROR,"failed to add item to index root page");
724694
pfree(itup);
725695
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp