88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.178 2010/03/28 09:27:01 sriggs Exp $
11+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.179 2010/08/29 19:33:14 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -74,9 +74,8 @@ static OffsetNumber _bt_findsplitloc(Relation rel, Page page,
7474static void _bt_checksplitloc (FindSplitData * state ,
7575OffsetNumber firstoldonright ,bool newitemonleft ,
7676int dataitemstoleft ,Size firstoldonrightsz );
77- static void _bt_pgaddtup (Relation rel ,Page page ,
78- Size itemsize ,IndexTuple itup ,
79- OffsetNumber itup_off ,const char * where );
77+ static bool _bt_pgaddtup (Page page ,Size itemsize ,IndexTuple itup ,
78+ OffsetNumber itup_off );
8079static bool _bt_isequal (TupleDesc itupdesc ,Page page ,OffsetNumber offnum ,
8180int keysz ,ScanKey scankey );
8281static void _bt_vacuum_one_page (Relation rel ,Buffer buffer ,Relation heapRel );
@@ -753,7 +752,9 @@ _bt_insertonpg(Relation rel,
753752/* Do the update. No ereport(ERROR) until changes are logged */
754753START_CRIT_SECTION ();
755754
756- _bt_pgaddtup (rel ,page ,itemsz ,itup ,newitemoff ,"page" );
755+ if (!_bt_pgaddtup (page ,itemsz ,itup ,newitemoff ))
756+ elog (PANIC ,"failed to add new item to block %u in index \"%s\"" ,
757+ itup_blkno ,RelationGetRelationName (rel ));
757758
758759MarkBufferDirty (buf );
759760
@@ -879,6 +880,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
879880Page origpage ;
880881Page leftpage ,
881882rightpage ;
883+ BlockNumber origpagenumber ,
884+ rightpagenumber ;
882885BTPageOpaque ropaque ,
883886lopaque ,
884887oopaque ;
@@ -894,11 +897,27 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
894897OffsetNumber i ;
895898bool isroot ;
896899
900+ /* Acquire a new page to split into */
897901rbuf = _bt_getbuf (rel ,P_NEW ,BT_WRITE );
902+
903+ /*
904+ * origpage is the original page to be split. leftpage is a temporary
905+ * buffer that receives the left-sibling data, which will be copied back
906+ * into origpage on success. rightpage is the new page that receives
907+ * the right-sibling data. If we fail before reaching the critical
908+ * section, origpage hasn't been modified and leftpage is only workspace.
909+ * In principle we shouldn't need to worry about rightpage either,
910+ * because it hasn't been linked into the btree page structure; but to
911+ * avoid leaving possibly-confusing junk behind, we are careful to rewrite
912+ * rightpage as zeroes before throwing any error.
913+ */
898914origpage = BufferGetPage (buf );
899915leftpage = PageGetTempPage (origpage );
900916rightpage = BufferGetPage (rbuf );
901917
918+ origpagenumber = BufferGetBlockNumber (buf );
919+ rightpagenumber = BufferGetBlockNumber (rbuf );
920+
902921_bt_pageinit (leftpage ,BufferGetPageSize (buf ));
903922/* rightpage was already initialized by _bt_getbuf */
904923
@@ -923,8 +942,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
923942lopaque -> btpo_flags &= ~(BTP_ROOT |BTP_SPLIT_END |BTP_HAS_GARBAGE );
924943ropaque -> btpo_flags = lopaque -> btpo_flags ;
925944lopaque -> btpo_prev = oopaque -> btpo_prev ;
926- lopaque -> btpo_next = BufferGetBlockNumber ( rbuf ) ;
927- ropaque -> btpo_prev = BufferGetBlockNumber ( buf ) ;
945+ lopaque -> btpo_next = rightpagenumber ;
946+ ropaque -> btpo_prev = origpagenumber ;
928947ropaque -> btpo_next = oopaque -> btpo_next ;
929948lopaque -> btpo .level = ropaque -> btpo .level = oopaque -> btpo .level ;
930949/* Since we already have write-lock on both pages, ok to read cycleid */
@@ -947,9 +966,12 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
947966item = (IndexTuple )PageGetItem (origpage ,itemid );
948967if (PageAddItem (rightpage , (Item )item ,itemsz ,rightoff ,
949968false, false)== InvalidOffsetNumber )
950- elog (PANIC ,"failed to add hikey to the right sibling"
969+ {
970+ memset (rightpage ,0 ,BufferGetPageSize (rbuf ));
971+ elog (ERROR ,"failed to add hikey to the right sibling"
951972" while splitting block %u of index \"%s\"" ,
952- BufferGetBlockNumber (buf ),RelationGetRelationName (rel ));
973+ origpagenumber ,RelationGetRelationName (rel ));
974+ }
953975rightoff = OffsetNumberNext (rightoff );
954976}
955977
@@ -974,9 +996,12 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
974996}
975997if (PageAddItem (leftpage , (Item )item ,itemsz ,leftoff ,
976998false, false)== InvalidOffsetNumber )
977- elog (PANIC ,"failed to add hikey to the left sibling"
999+ {
1000+ memset (rightpage ,0 ,BufferGetPageSize (rbuf ));
1001+ elog (ERROR ,"failed to add hikey to the left sibling"
9781002" while splitting block %u of index \"%s\"" ,
979- BufferGetBlockNumber (buf ),RelationGetRelationName (rel ));
1003+ origpagenumber ,RelationGetRelationName (rel ));
1004+ }
9801005leftoff = OffsetNumberNext (leftoff );
9811006
9821007/*
@@ -998,29 +1023,49 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
9981023{
9991024if (newitemonleft )
10001025{
1001- _bt_pgaddtup (rel ,leftpage ,newitemsz ,newitem ,leftoff ,
1002- "left sibling" );
1026+ if (!_bt_pgaddtup (leftpage ,newitemsz ,newitem ,leftoff ))
1027+ {
1028+ memset (rightpage ,0 ,BufferGetPageSize (rbuf ));
1029+ elog (ERROR ,"failed to add new item to the left sibling"
1030+ " while splitting block %u of index \"%s\"" ,
1031+ origpagenumber ,RelationGetRelationName (rel ));
1032+ }
10031033leftoff = OffsetNumberNext (leftoff );
10041034}
10051035else
10061036{
1007- _bt_pgaddtup (rel ,rightpage ,newitemsz ,newitem ,rightoff ,
1008- "right sibling" );
1037+ if (!_bt_pgaddtup (rightpage ,newitemsz ,newitem ,rightoff ))
1038+ {
1039+ memset (rightpage ,0 ,BufferGetPageSize (rbuf ));
1040+ elog (ERROR ,"failed to add new item to the right sibling"
1041+ " while splitting block %u of index \"%s\"" ,
1042+ origpagenumber ,RelationGetRelationName (rel ));
1043+ }
10091044rightoff = OffsetNumberNext (rightoff );
10101045}
10111046}
10121047
10131048/* decide which page to put it on */
10141049if (i < firstright )
10151050{
1016- _bt_pgaddtup (rel ,leftpage ,itemsz ,item ,leftoff ,
1017- "left sibling" );
1051+ if (!_bt_pgaddtup (leftpage ,itemsz ,item ,leftoff ))
1052+ {
1053+ memset (rightpage ,0 ,BufferGetPageSize (rbuf ));
1054+ elog (ERROR ,"failed to add old item to the left sibling"
1055+ " while splitting block %u of index \"%s\"" ,
1056+ origpagenumber ,RelationGetRelationName (rel ));
1057+ }
10181058leftoff = OffsetNumberNext (leftoff );
10191059}
10201060else
10211061{
1022- _bt_pgaddtup (rel ,rightpage ,itemsz ,item ,rightoff ,
1023- "right sibling" );
1062+ if (!_bt_pgaddtup (rightpage ,itemsz ,item ,rightoff ))
1063+ {
1064+ memset (rightpage ,0 ,BufferGetPageSize (rbuf ));
1065+ elog (ERROR ,"failed to add old item to the right sibling"
1066+ " while splitting block %u of index \"%s\"" ,
1067+ origpagenumber ,RelationGetRelationName (rel ));
1068+ }
10241069rightoff = OffsetNumberNext (rightoff );
10251070}
10261071}
@@ -1034,8 +1079,13 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
10341079 * not be splitting the page).
10351080 */
10361081Assert (!newitemonleft );
1037- _bt_pgaddtup (rel ,rightpage ,newitemsz ,newitem ,rightoff ,
1038- "right sibling" );
1082+ if (!_bt_pgaddtup (rightpage ,newitemsz ,newitem ,rightoff ))
1083+ {
1084+ memset (rightpage ,0 ,BufferGetPageSize (rbuf ));
1085+ elog (ERROR ,"failed to add new item to the right sibling"
1086+ " while splitting block %u of index \"%s\"" ,
1087+ origpagenumber ,RelationGetRelationName (rel ));
1088+ }
10391089rightoff = OffsetNumberNext (rightoff );
10401090}
10411091
@@ -1047,16 +1097,19 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
10471097 * neighbors.
10481098 */
10491099
1050- if (!P_RIGHTMOST (ropaque ))
1100+ if (!P_RIGHTMOST (oopaque ))
10511101{
1052- sbuf = _bt_getbuf (rel ,ropaque -> btpo_next ,BT_WRITE );
1102+ sbuf = _bt_getbuf (rel ,oopaque -> btpo_next ,BT_WRITE );
10531103spage = BufferGetPage (sbuf );
10541104sopaque = (BTPageOpaque )PageGetSpecialPointer (spage );
1055- if (sopaque -> btpo_prev != ropaque -> btpo_prev )
1056- elog (PANIC ,"right sibling's left-link doesn't match: "
1057- "block %u links to %u instead of expected %u in index \"%s\"" ,
1058- ropaque -> btpo_next ,sopaque -> btpo_prev ,ropaque -> btpo_prev ,
1105+ if (sopaque -> btpo_prev != origpagenumber )
1106+ {
1107+ memset (rightpage ,0 ,BufferGetPageSize (rbuf ));
1108+ elog (ERROR ,"right sibling's left-link doesn't match: "
1109+ "block %u links to %u instead of expected %u in index \"%s\"" ,
1110+ oopaque -> btpo_next ,sopaque -> btpo_prev ,origpagenumber ,
10591111RelationGetRelationName (rel ));
1112+ }
10601113
10611114/*
10621115 * Check to see if we can set the SPLIT_END flag in the right-hand
@@ -1081,8 +1134,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
10811134 *
10821135 * NO EREPORT(ERROR) till right sibling is updated. We can get away with
10831136 * not starting the critical section till here because we haven't been
1084- * scribbling on the original page yet, and we don't care about the new
1085- * sibling until it's linked into the btree.
1137+ * scribbling on the original page yet; see comments above.
10861138 */
10871139START_CRIT_SECTION ();
10881140
@@ -1094,19 +1146,21 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
10941146 * (in the page management code) that the center of a page always be
10951147 * clean, and the most efficient way to guarantee this is just to compact
10961148 * the data by reinserting it into a new left page. (XXX the latter
1097- * comment is probably obsolete.)
1149+ * comment is probably obsolete; but in any case it's good to not scribble
1150+ * on the original page until we enter the critical section.)
10981151 *
10991152 * We need to do this before writing the WAL record, so that XLogInsert
11001153 * can WAL log an image of the page if necessary.
11011154 */
11021155PageRestoreTempPage (leftpage ,origpage );
1156+ /* leftpage, lopaque must not be used below here */
11031157
11041158MarkBufferDirty (buf );
11051159MarkBufferDirty (rbuf );
11061160
11071161if (!P_RIGHTMOST (ropaque ))
11081162{
1109- sopaque -> btpo_prev = BufferGetBlockNumber ( rbuf ) ;
1163+ sopaque -> btpo_prev = rightpagenumber ;
11101164MarkBufferDirty (sbuf );
11111165}
11121166
@@ -1120,8 +1174,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
11201174XLogRecData * lastrdata ;
11211175
11221176xlrec .node = rel -> rd_node ;
1123- xlrec .leftsib = BufferGetBlockNumber ( buf ) ;
1124- xlrec .rightsib = BufferGetBlockNumber ( rbuf ) ;
1177+ xlrec .leftsib = origpagenumber ;
1178+ xlrec .rightsib = rightpagenumber ;
11251179xlrec .rnext = ropaque -> btpo_next ;
11261180xlrec .level = ropaque -> btpo .level ;
11271181xlrec .firstright = firstright ;
@@ -1920,13 +1974,11 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
19201974 *we insert the tuples in order, so that the given itup_off does
19211975 *represent the final position of the tuple!
19221976 */
1923- static void
1924- _bt_pgaddtup (Relation rel ,
1925- Page page ,
1977+ static bool
1978+ _bt_pgaddtup (Page page ,
19261979Size itemsize ,
19271980IndexTuple itup ,
1928- OffsetNumber itup_off ,
1929- const char * where )
1981+ OffsetNumber itup_off )
19301982{
19311983BTPageOpaque opaque = (BTPageOpaque )PageGetSpecialPointer (page );
19321984IndexTupleData trunctuple ;
@@ -1941,8 +1993,9 @@ _bt_pgaddtup(Relation rel,
19411993
19421994if (PageAddItem (page , (Item )itup ,itemsize ,itup_off ,
19431995false, false)== InvalidOffsetNumber )
1944- elog (PANIC ,"failed to add item to the %s in index \"%s\"" ,
1945- where ,RelationGetRelationName (rel ));
1996+ return false;
1997+
1998+ return true;
19461999}
19472000
19482001/*