Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2095206

Browse files
committed
Adjust btree index build to not use shared buffers, thereby avoiding the
locking conflict against concurrent CHECKPOINT that was discussed a fewweeks ago. Also, if not using WAL archiving (which is always true ATMbut won't be if PITR makes it into this release), there's no need toWAL-log the index build process; it's sufficient to force-fsync thecompleted index before commit. This seems to gain about a factor of 2in my tests, which is consistent with writing half as much data. I didnot try it with WAL on a separate drive though --- probably the gain wouldbe a lot less in that scenario.
1 parent4d0e47d commit2095206

File tree

8 files changed

+304
-214
lines changed

8 files changed

+304
-214
lines changed

‎src/backend/access/nbtree/nbtpage.c

Lines changed: 32 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.75 2004/04/21 18:24:25 tgl Exp $
12+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.76 2004/06/02 17:28:17 tgl Exp $
1313
*
1414
*NOTES
1515
* Postgres btree pages look like ordinary relation pages.The opaque
@@ -31,21 +31,21 @@
3131
/*
3232
*_bt_metapinit() -- Initialize the metadata page of a new btree.
3333
*
34-
* If markvalid is true, the index is immediately marked valid, else it
35-
* will be invalid until _bt_metaproot() is called.
34+
* Note: this is actually not used for standard btree index building;
35+
* nbtsort.c prefers not to make the metadata page valid until completion
36+
* of build.
3637
*
3738
* Note: there's no real need for any locking here. Since the transaction
3839
* creating the index hasn't committed yet, no one else can even see the index
3940
* much less be trying to use it. (In a REINDEX-in-place scenario, that's
4041
* not true, but we assume the caller holds sufficient locks on the index.)
4142
*/
4243
void
43-
_bt_metapinit(Relationrel,boolmarkvalid)
44+
_bt_metapinit(Relationrel)
4445
{
4546
Bufferbuf;
4647
Pagepg;
4748
BTMetaPageData*metad;
48-
BTPageOpaqueop;
4949

5050
if (RelationGetNumberOfBlocks(rel)!=0)
5151
elog(ERROR,"cannot initialize non-empty btree index \"%s\"",
@@ -55,21 +55,11 @@ _bt_metapinit(Relation rel, bool markvalid)
5555
Assert(BufferGetBlockNumber(buf)==BTREE_METAPAGE);
5656
pg=BufferGetPage(buf);
5757

58-
/* NO ELOG(ERROR) from here till newmeta op is logged */
59-
START_CRIT_SECTION();
60-
61-
_bt_pageinit(pg,BufferGetPageSize(buf));
62-
58+
_bt_initmetapage(pg,P_NONE,0);
6359
metad=BTPageGetMeta(pg);
64-
metad->btm_magic=markvalid ?BTREE_MAGIC :0;
65-
metad->btm_version=BTREE_VERSION;
66-
metad->btm_root=P_NONE;
67-
metad->btm_level=0;
68-
metad->btm_fastroot=P_NONE;
69-
metad->btm_fastlevel=0;
7060

71-
op= (BTPageOpaque)PageGetSpecialPointer(pg);
72-
op->btpo_flags=BTP_META;
61+
/* NO ELOG(ERROR) from here till newmeta op is logged */
62+
START_CRIT_SECTION();
7363

7464
/* XLOG stuff */
7565
if (!rel->rd_istemp)
@@ -90,7 +80,7 @@ _bt_metapinit(Relation rel, bool markvalid)
9080
rdata[0].next=NULL;
9181

9282
recptr=XLogInsert(RM_BTREE_ID,
93-
markvalid ?XLOG_BTREE_NEWMETA :XLOG_BTREE_INVALIDMETA,
83+
XLOG_BTREE_NEWMETA,
9484
rdata);
9585

9686
PageSetLSN(pg,recptr);
@@ -102,6 +92,29 @@ _bt_metapinit(Relation rel, bool markvalid)
10292
WriteBuffer(buf);
10393
}
10494

95+
/*
96+
*_bt_initmetapage() -- Fill a page buffer with a correct metapage image
97+
*/
98+
void
99+
_bt_initmetapage(Pagepage,BlockNumberrootbknum,uint32level)
100+
{
101+
BTMetaPageData*metad;
102+
BTPageOpaquemetaopaque;
103+
104+
_bt_pageinit(page,BLCKSZ);
105+
106+
metad=BTPageGetMeta(page);
107+
metad->btm_magic=BTREE_MAGIC;
108+
metad->btm_version=BTREE_VERSION;
109+
metad->btm_root=rootbknum;
110+
metad->btm_level=level;
111+
metad->btm_fastroot=rootbknum;
112+
metad->btm_fastlevel=level;
113+
114+
metaopaque= (BTPageOpaque)PageGetSpecialPointer(page);
115+
metaopaque->btpo_flags=BTP_META;
116+
}
117+
105118
/*
106119
*_bt_getroot() -- Get the root page of the btree.
107120
*
@@ -609,76 +622,6 @@ _bt_page_recyclable(Page page)
609622
return false;
610623
}
611624

612-
/*
613-
*_bt_metaproot() -- Change the root page of the btree.
614-
*
615-
*Lehman and Yao require that the root page move around in order to
616-
*guarantee deadlock-free short-term, fine-granularity locking. When
617-
*we split the root page, we record the new parent in the metadata page
618-
*for the relation. This routine does the work.
619-
*
620-
*No direct preconditions, but if you don't have the write lock on
621-
*at least the old root page when you call this, you're making a big
622-
*mistake. On exit, metapage data is correct and we no longer have
623-
*a pin or lock on the metapage.
624-
*
625-
* Actually this is not used for splitting on-the-fly anymore.It's only used
626-
* in nbtsort.c at the completion of btree building, where we know we have
627-
* sole access to the index anyway.
628-
*/
629-
void
630-
_bt_metaproot(Relationrel,BlockNumberrootbknum,uint32level)
631-
{
632-
Buffermetabuf;
633-
Pagemetap;
634-
BTPageOpaquemetaopaque;
635-
BTMetaPageData*metad;
636-
637-
metabuf=_bt_getbuf(rel,BTREE_METAPAGE,BT_WRITE);
638-
metap=BufferGetPage(metabuf);
639-
metaopaque= (BTPageOpaque)PageGetSpecialPointer(metap);
640-
Assert(metaopaque->btpo_flags&BTP_META);
641-
642-
/* NO ELOG(ERROR) from here till newmeta op is logged */
643-
START_CRIT_SECTION();
644-
645-
metad=BTPageGetMeta(metap);
646-
Assert(metad->btm_magic==BTREE_MAGIC||metad->btm_magic==0);
647-
metad->btm_magic=BTREE_MAGIC;/* it's valid now for sure */
648-
metad->btm_root=rootbknum;
649-
metad->btm_level=level;
650-
metad->btm_fastroot=rootbknum;
651-
metad->btm_fastlevel=level;
652-
653-
/* XLOG stuff */
654-
if (!rel->rd_istemp)
655-
{
656-
xl_btree_newmetaxlrec;
657-
XLogRecPtrrecptr;
658-
XLogRecDatardata[1];
659-
660-
xlrec.node=rel->rd_node;
661-
xlrec.meta.root=metad->btm_root;
662-
xlrec.meta.level=metad->btm_level;
663-
xlrec.meta.fastroot=metad->btm_fastroot;
664-
xlrec.meta.fastlevel=metad->btm_fastlevel;
665-
666-
rdata[0].buffer=InvalidBuffer;
667-
rdata[0].data= (char*)&xlrec;
668-
rdata[0].len=SizeOfBtreeNewmeta;
669-
rdata[0].next=NULL;
670-
671-
recptr=XLogInsert(RM_BTREE_ID,XLOG_BTREE_NEWMETA,rdata);
672-
673-
PageSetLSN(metap,recptr);
674-
PageSetSUI(metap,ThisStartUpID);
675-
}
676-
677-
END_CRIT_SECTION();
678-
679-
_bt_wrtbuf(rel,metabuf);
680-
}
681-
682625
/*
683626
* Delete item(s) from a btree page.
684627
*

‎src/backend/access/nbtree/nbtree.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
1414
* IDENTIFICATION
15-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.116 2004/05/31 19:24:04 tgl Exp $
15+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.117 2004/06/02 17:28:17 tgl Exp $
1616
*
1717
*-------------------------------------------------------------------------
1818
*/
@@ -112,10 +112,6 @@ btbuild(PG_FUNCTION_ARGS)
112112
elog(ERROR,"index \"%s\" already contains data",
113113
RelationGetRelationName(index));
114114

115-
/* initialize the btree index metadata page */
116-
/* mark it valid right away only if using slow build */
117-
_bt_metapinit(index, !buildstate.usefast);
118-
119115
if (buildstate.usefast)
120116
{
121117
buildstate.spool=_bt_spoolinit(index,indexInfo->ii_Unique, false);
@@ -127,6 +123,11 @@ btbuild(PG_FUNCTION_ARGS)
127123
if (indexInfo->ii_Unique)
128124
buildstate.spool2=_bt_spoolinit(index, false, true);
129125
}
126+
else
127+
{
128+
/* if using slow build, initialize the btree index metadata page */
129+
_bt_metapinit(index);
130+
}
130131

131132
/* do the heap scan */
132133
reltuples=IndexBuildHeapScan(heap,index,indexInfo,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp