Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8af2565

Browse files
committed
Introduce a new smgr bulk loading facility.
The new facility makes it easier to optimize bulk loading, as thelogic for buffering, WAL-logging, and syncing the relation only needsto be implemented once. It's also less error-prone: We have had anumber of bugs in how a relation is fsync'd - or not - at the end of abulk loading operation. By centralizing that logic to one place, weonly need to write it correctly once.The new facility is faster for small relations: Instead of of callingsmgrimmedsync(), we register the fsync to happen at next checkpoint,which avoids the fsync latency. That can make a big difference if youare e.g. restoring a schema-only dump with lots of relations.It is also slightly more efficient with large relations, as the WALlogging is performed multiple pages at a time. That avoids some WALheader overhead. The sorted GiST index build did that already, thismoves the buffering to the new facility.The changes to pageinspect GiST test needs an explanation: Before thispatch, the sorted GiST index build set the LSN on every page to thespecial GistBuildLSN value, not the LSN of the WAL record, even thoughthey were WAL-logged. There was no particular need for it, it justhappened naturally when we wrote out the pages before WAL-loggingthem. Now we WAL-log the pages first, like in B-tree build, so thepages are stamped with the record's real LSN. When the build is notWAL-logged, we still use GistBuildLSN. To make the test outputpredictable, use an unlogged index.Reviewed-by: Andres FreundDiscussion:https://www.postgresql.org/message-id/30e8f366-58b3-b239-c521-422122dd5150%40iki.fi
1 parente612384 commit8af2565

File tree

17 files changed

+552
-355
lines changed

17 files changed

+552
-355
lines changed

‎contrib/pageinspect/expected/gist.out

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
1-
-- The gist_page_opaque_info() function prints the page's LSN. Normally,
2-
-- that's constant 1 (GistBuildLSN) on every page of a freshly built GiST
3-
-- index. But with wal_level=minimal, the whole relation is dumped to WAL at
4-
-- the end of the transaction if it's smaller than wal_skip_threshold, which
5-
-- updates the LSNs. Wrap the tests on gist_page_opaque_info() in the
6-
-- same transaction with the CREATE INDEX so that we see the LSNs before
7-
-- they are possibly overwritten at end of transaction.
8-
BEGIN;
9-
-- Create a test table and GiST index.
10-
CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
1+
-- The gist_page_opaque_info() function prints the page's LSN.
2+
-- Use an unlogged index, so that the LSN is predictable.
3+
CREATE UNLOGGED TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
114
generate_series(1,1000) i;
125
CREATE INDEX test_gist_idx ON test_gist USING gist (p);
136
-- Page 0 is the root, the rest are leaf pages
@@ -29,7 +22,6 @@ SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));
2922
0/1 | 0/0 | 1 | {leaf}
3023
(1 row)
3124

32-
COMMIT;
3325
SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');
3426
itemoffset | ctid | itemlen | dead | keys
3527
------------+-----------+---------+------+-------------------------------

‎contrib/pageinspect/sql/gist.sql

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
1-
-- The gist_page_opaque_info() function prints the page's LSN. Normally,
2-
-- that's constant 1 (GistBuildLSN) on every page of a freshly built GiST
3-
-- index. But with wal_level=minimal, the whole relation is dumped to WAL at
4-
-- the end of the transaction if it's smaller than wal_skip_threshold, which
5-
-- updates the LSNs. Wrap the tests on gist_page_opaque_info() in the
6-
-- same transaction with the CREATE INDEX so that we see the LSNs before
7-
-- they are possibly overwritten at end of transaction.
8-
BEGIN;
9-
10-
-- Create a test table and GiST index.
11-
CREATETABLEtest_gistASSELECTpoint(i,i) p, i::text tFROM
1+
-- The gist_page_opaque_info() function prints the page's LSN.
2+
-- Use an unlogged index, so that the LSN is predictable.
3+
CREATE UNLOGGED TABLE test_gistASSELECTpoint(i,i) p, i::text tFROM
124
generate_series(1,1000) i;
135
CREATEINDEXtest_gist_idxON test_gist USING gist (p);
146

@@ -17,8 +9,6 @@ SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));
179
SELECT*FROM gist_page_opaque_info(get_raw_page('test_gist_idx',1));
1810
SELECT*FROM gist_page_opaque_info(get_raw_page('test_gist_idx',2));
1911

20-
COMMIT;
21-
2212
SELECT*FROM gist_page_items(get_raw_page('test_gist_idx',0),'test_gist_idx');
2313
SELECT*FROM gist_page_items(get_raw_page('test_gist_idx',1),'test_gist_idx')LIMIT5;
2414

‎src/backend/access/gist/gistbuild.c

Lines changed: 28 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@
4343
#include"miscadmin.h"
4444
#include"optimizer/optimizer.h"
4545
#include"storage/bufmgr.h"
46-
#include"storage/smgr.h"
46+
#include"storage/bulk_write.h"
47+
4748
#include"utils/memutils.h"
4849
#include"utils/rel.h"
4950
#include"utils/tuplesort.h"
@@ -106,11 +107,8 @@ typedef struct
106107
Tuplesortstate*sortstate;/* state data for tuplesort.c */
107108

108109
BlockNumberpages_allocated;
109-
BlockNumberpages_written;
110110

111-
intready_num_pages;
112-
BlockNumberready_blknos[XLR_MAX_BLOCK_ID];
113-
Pageready_pages[XLR_MAX_BLOCK_ID];
111+
BulkWriteState*bulkstate;
114112
}GISTBuildState;
115113

116114
#defineGIST_SORTED_BUILD_PAGE_NUM 4
@@ -142,7 +140,6 @@ static void gist_indexsortbuild_levelstate_add(GISTBuildState *state,
142140
IndexTupleitup);
143141
staticvoidgist_indexsortbuild_levelstate_flush(GISTBuildState*state,
144142
GistSortedBuildLevelState*levelstate);
145-
staticvoidgist_indexsortbuild_flush_ready_pages(GISTBuildState*state);
146143

147144
staticvoidgistInitBuffering(GISTBuildState*buildstate);
148145
staticintcalculatePagesPerBuffer(GISTBuildState*buildstate,intlevelStep);
@@ -405,27 +402,18 @@ gist_indexsortbuild(GISTBuildState *state)
405402
{
406403
IndexTupleitup;
407404
GistSortedBuildLevelState*levelstate;
408-
Pagepage;
405+
BulkWriteBufferrootbuf;
409406

410-
state->pages_allocated=0;
411-
state->pages_written=0;
412-
state->ready_num_pages=0;
407+
/* Reserve block 0 for the root page */
408+
state->pages_allocated=1;
413409

414-
/*
415-
* Write an empty page as a placeholder for the root page. It will be
416-
* replaced with the real root page at the end.
417-
*/
418-
page=palloc_aligned(BLCKSZ,PG_IO_ALIGN_SIZE,MCXT_ALLOC_ZERO);
419-
smgrextend(RelationGetSmgr(state->indexrel),MAIN_FORKNUM,GIST_ROOT_BLKNO,
420-
page, true);
421-
state->pages_allocated++;
422-
state->pages_written++;
410+
state->bulkstate=smgr_bulk_start_rel(state->indexrel,MAIN_FORKNUM);
423411

424412
/* Allocate a temporary buffer for the first leaf page batch. */
425413
levelstate=palloc0(sizeof(GistSortedBuildLevelState));
426-
levelstate->pages[0]=page;
414+
levelstate->pages[0]=palloc(BLCKSZ);
427415
levelstate->parent=NULL;
428-
gistinitpage(page,F_LEAF);
416+
gistinitpage(levelstate->pages[0],F_LEAF);
429417

430418
/*
431419
* Fill index pages with tuples in the sorted order.
@@ -455,31 +443,15 @@ gist_indexsortbuild(GISTBuildState *state)
455443
levelstate=parent;
456444
}
457445

458-
gist_indexsortbuild_flush_ready_pages(state);
459-
460446
/* Write out the root */
461447
PageSetLSN(levelstate->pages[0],GistBuildLSN);
462-
PageSetChecksumInplace(levelstate->pages[0],GIST_ROOT_BLKNO);
463-
smgrwrite(RelationGetSmgr(state->indexrel),MAIN_FORKNUM,GIST_ROOT_BLKNO,
464-
levelstate->pages[0], true);
465-
if (RelationNeedsWAL(state->indexrel))
466-
log_newpage(&state->indexrel->rd_locator,MAIN_FORKNUM,GIST_ROOT_BLKNO,
467-
levelstate->pages[0], true);
468-
469-
pfree(levelstate->pages[0]);
448+
rootbuf=smgr_bulk_get_buf(state->bulkstate);
449+
memcpy(rootbuf,levelstate->pages[0],BLCKSZ);
450+
smgr_bulk_write(state->bulkstate,GIST_ROOT_BLKNO,rootbuf, true);
451+
470452
pfree(levelstate);
471453

472-
/*
473-
* When we WAL-logged index pages, we must nonetheless fsync index files.
474-
* Since we're building outside shared buffers, a CHECKPOINT occurring
475-
* during the build has no way to flush the previously written data to
476-
* disk (indeed it won't know the index even exists). A crash later on
477-
* would replay WAL from the checkpoint, therefore it wouldn't replay our
478-
* earlier WAL entries. If we do not fsync those pages here, they might
479-
* still not be on disk when the crash occurs.
480-
*/
481-
if (RelationNeedsWAL(state->indexrel))
482-
smgrimmedsync(RelationGetSmgr(state->indexrel),MAIN_FORKNUM);
454+
smgr_bulk_finish(state->bulkstate);
483455
}
484456

485457
/*
@@ -509,8 +481,7 @@ gist_indexsortbuild_levelstate_add(GISTBuildState *state,
509481
levelstate->current_page++;
510482

511483
if (levelstate->pages[levelstate->current_page]==NULL)
512-
levelstate->pages[levelstate->current_page]=
513-
palloc_aligned(BLCKSZ,PG_IO_ALIGN_SIZE,0);
484+
levelstate->pages[levelstate->current_page]=palloc0(BLCKSZ);
514485

515486
newPage=levelstate->pages[levelstate->current_page];
516487
gistinitpage(newPage,old_page_flags);
@@ -573,14 +544,16 @@ gist_indexsortbuild_levelstate_flush(GISTBuildState *state,
573544
for (;dist!=NULL;dist=dist->next)
574545
{
575546
char*data;
547+
BulkWriteBufferbuf;
576548
Pagetarget;
577549

578550
/* check once per page */
579551
CHECK_FOR_INTERRUPTS();
580552

581553
/* Create page and copy data */
582554
data= (char*) (dist->list);
583-
target=palloc_aligned(BLCKSZ,PG_IO_ALIGN_SIZE,MCXT_ALLOC_ZERO);
555+
buf=smgr_bulk_get_buf(state->bulkstate);
556+
target= (Page)buf;
584557
gistinitpage(target,isleaf ?F_LEAF :0);
585558
for (inti=0;i<dist->block.num;i++)
586559
{
@@ -593,20 +566,6 @@ gist_indexsortbuild_levelstate_flush(GISTBuildState *state,
593566
}
594567
union_tuple=dist->itup;
595568

596-
if (state->ready_num_pages==XLR_MAX_BLOCK_ID)
597-
gist_indexsortbuild_flush_ready_pages(state);
598-
599-
/*
600-
* The page is now complete. Assign a block number to it, and add it
601-
* to the list of finished pages. (We don't write it out immediately,
602-
* because we want to WAL-log the pages in batches.)
603-
*/
604-
blkno=state->pages_allocated++;
605-
state->ready_blknos[state->ready_num_pages]=blkno;
606-
state->ready_pages[state->ready_num_pages]=target;
607-
state->ready_num_pages++;
608-
ItemPointerSetBlockNumber(&(union_tuple->t_tid),blkno);
609-
610569
/*
611570
* Set the right link to point to the previous page. This is just for
612571
* debugging purposes: GiST only follows the right link if a page is
@@ -621,6 +580,15 @@ gist_indexsortbuild_levelstate_flush(GISTBuildState *state,
621580
*/
622581
if (levelstate->last_blkno)
623582
GistPageGetOpaque(target)->rightlink=levelstate->last_blkno;
583+
584+
/*
585+
* The page is now complete. Assign a block number to it, and pass it
586+
* to the bulk writer.
587+
*/
588+
blkno=state->pages_allocated++;
589+
PageSetLSN(target,GistBuildLSN);
590+
smgr_bulk_write(state->bulkstate,blkno,buf, true);
591+
ItemPointerSetBlockNumber(&(union_tuple->t_tid),blkno);
624592
levelstate->last_blkno=blkno;
625593

626594
/*
@@ -631,7 +599,7 @@ gist_indexsortbuild_levelstate_flush(GISTBuildState *state,
631599
if (parent==NULL)
632600
{
633601
parent=palloc0(sizeof(GistSortedBuildLevelState));
634-
parent->pages[0]=(Page)palloc_aligned(BLCKSZ,PG_IO_ALIGN_SIZE,0);
602+
parent->pages[0]=palloc(BLCKSZ);
635603
parent->parent=NULL;
636604
gistinitpage(parent->pages[0],0);
637605

@@ -641,39 +609,6 @@ gist_indexsortbuild_levelstate_flush(GISTBuildState *state,
641609
}
642610
}
643611

644-
staticvoid
645-
gist_indexsortbuild_flush_ready_pages(GISTBuildState*state)
646-
{
647-
if (state->ready_num_pages==0)
648-
return;
649-
650-
for (inti=0;i<state->ready_num_pages;i++)
651-
{
652-
Pagepage=state->ready_pages[i];
653-
BlockNumberblkno=state->ready_blknos[i];
654-
655-
/* Currently, the blocks must be buffered in order. */
656-
if (blkno!=state->pages_written)
657-
elog(ERROR,"unexpected block number to flush GiST sorting build");
658-
659-
PageSetLSN(page,GistBuildLSN);
660-
PageSetChecksumInplace(page,blkno);
661-
smgrextend(RelationGetSmgr(state->indexrel),MAIN_FORKNUM,blkno,page,
662-
true);
663-
664-
state->pages_written++;
665-
}
666-
667-
if (RelationNeedsWAL(state->indexrel))
668-
log_newpages(&state->indexrel->rd_locator,MAIN_FORKNUM,state->ready_num_pages,
669-
state->ready_blknos,state->ready_pages, true);
670-
671-
for (inti=0;i<state->ready_num_pages;i++)
672-
pfree(state->ready_pages[i]);
673-
674-
state->ready_num_pages=0;
675-
}
676-
677612

678613
/*-------------------------------------------------------------------------
679614
* Routines for non-sorted build

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp