88 *
99 *
1010 * IDENTIFICATION
11- * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.97 2003/02/21 00:06:21 tgl Exp $
11+ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.98 2003/02/22 00:45:03 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -280,12 +280,21 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
280280if (!_bt_isequal (itupdesc ,page ,P_HIKEY ,
281281natts ,itup_scankey ))
282282break ;
283- nblkno = opaque -> btpo_next ;
284- if (nbuf != InvalidBuffer )
285- _bt_relbuf (rel ,nbuf );
286- nbuf = _bt_getbuf (rel ,nblkno ,BT_READ );
287- page = BufferGetPage (nbuf );
288- opaque = (BTPageOpaque )PageGetSpecialPointer (page );
283+ /* Advance to next non-dead page --- there must be one */
284+ for (;;)
285+ {
286+ nblkno = opaque -> btpo_next ;
287+ if (nbuf != InvalidBuffer )
288+ _bt_relbuf (rel ,nbuf );
289+ nbuf = _bt_getbuf (rel ,nblkno ,BT_READ );
290+ page = BufferGetPage (nbuf );
291+ opaque = (BTPageOpaque )PageGetSpecialPointer (page );
292+ if (!P_IGNORE (opaque ))
293+ break ;
294+ if (P_RIGHTMOST (opaque ))
295+ elog (ERROR ,"_bt_check_unique: fell off the end of %s" ,
296+ RelationGetRelationName (rel ));
297+ }
289298maxoff = PageGetMaxOffsetNumber (page );
290299offset = P_FIRSTDATAKEY (opaque );
291300}
@@ -414,20 +423,34 @@ _bt_insertonpg(Relation rel,
414423_bt_compare (rel ,keysz ,scankey ,page ,P_HIKEY )== 0 &&
415424random ()> (MAX_RANDOM_VALUE /100 ))
416425{
417- /* step right one page */
418- BlockNumber rblkno = lpageop -> btpo_next ;
419- Buffer rbuf ;
420-
421426/*
422- * must write-lock next page before releasing write lock on
427+ * step right to next non-dead page
428+ *
429+ * must write-lock that page before releasing write lock on
423430 * current page; else someone else's _bt_check_unique scan
424- * could fail to see our insertion.
431+ * could fail to see our insertion. write locks on intermediate
432+ * dead pages won't do because we don't know when they will get
433+ * de-linked from the tree.
425434 */
426- rbuf = _bt_getbuf (rel ,rblkno ,BT_WRITE );
435+ Buffer rbuf = InvalidBuffer ;
436+
437+ for (;;)
438+ {
439+ BlockNumber rblkno = lpageop -> btpo_next ;
440+
441+ if (rbuf != InvalidBuffer )
442+ _bt_relbuf (rel ,rbuf );
443+ rbuf = _bt_getbuf (rel ,rblkno ,BT_WRITE );
444+ page = BufferGetPage (rbuf );
445+ lpageop = (BTPageOpaque )PageGetSpecialPointer (page );
446+ if (!P_IGNORE (lpageop ))
447+ break ;
448+ if (P_RIGHTMOST (lpageop ))
449+ elog (ERROR ,"_bt_insertonpg: fell off the end of %s" ,
450+ RelationGetRelationName (rel ));
451+ }
427452_bt_relbuf (rel ,buf );
428453buf = rbuf ;
429- page = BufferGetPage (buf );
430- lpageop = (BTPageOpaque )PageGetSpecialPointer (page );
431454movedright = true;
432455}
433456
@@ -633,8 +656,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
633656BTPageOpaque ropaque ,
634657lopaque ,
635658oopaque ;
636- Buffer sbuf = 0 ;
637- Page spage = 0 ;
659+ Buffer sbuf = InvalidBuffer ;
660+ Page spage = NULL ;
661+ BTPageOpaque sopaque = NULL ;
638662Size itemsz ;
639663ItemId itemid ;
640664BTItem item ;
@@ -792,6 +816,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
792816{
793817sbuf = _bt_getbuf (rel ,ropaque -> btpo_next ,BT_WRITE );
794818spage = BufferGetPage (sbuf );
819+ sopaque = (BTPageOpaque )PageGetSpecialPointer (spage );
820+ if (sopaque -> btpo_prev != ropaque -> btpo_prev )
821+ elog (PANIC ,"btree: right sibling's left-link doesn't match" );
795822}
796823
797824/*
@@ -802,6 +829,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
802829 */
803830START_CRIT_SECTION ();
804831
832+ if (!P_RIGHTMOST (ropaque ))
833+ sopaque -> btpo_prev = BufferGetBlockNumber (rbuf );
834+
805835/* XLOG stuff */
806836if (!rel -> rd_istemp )
807837{
@@ -847,10 +877,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
847877
848878if (!P_RIGHTMOST (ropaque ))
849879{
850- BTPageOpaque sopaque = (BTPageOpaque )PageGetSpecialPointer (spage );
851-
852- sopaque -> btpo_prev = BufferGetBlockNumber (rbuf );
853-
854880rdata [2 ].next = & (rdata [3 ]);
855881rdata [3 ].buffer = sbuf ;
856882rdata [3 ].data = NULL ;
@@ -1250,58 +1276,63 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
12501276Buffer buf ;
12511277Page page ;
12521278BTPageOpaque opaque ;
1253- OffsetNumber offnum ,
1254- minoff ,
1255- maxoff ;
1256- ItemId itemid ;
1257- BTItem item ;
12581279
12591280buf = _bt_getbuf (rel ,blkno ,access );
12601281page = BufferGetPage (buf );
12611282opaque = (BTPageOpaque )PageGetSpecialPointer (page );
1262- minoff = P_FIRSTDATAKEY (opaque );
1263- maxoff = PageGetMaxOffsetNumber (page );
12641283
1265- /*
1266- * start = InvalidOffsetNumber means "search the whole page".
1267- * We need this test anyway due to possibility that
1268- * page has a high key now when it didn't before.
1269- */
1270- if (start < minoff )
1271- start = minoff ;
1272-
1273- /*
1274- * These loops will check every item on the page --- but in an order
1275- * that's attuned to the probability of where it actually is. Scan
1276- * to the right first, then to the left.
1277- */
1278- for (offnum = start ;
1279- offnum <=maxoff ;
1280- offnum = OffsetNumberNext (offnum ))
1284+ if (!P_IGNORE (opaque ))
12811285{
1282- itemid = PageGetItemId (page ,offnum );
1283- item = (BTItem )PageGetItem (page ,itemid );
1284- if (BTItemSame (item ,& stack -> bts_btitem ))
1286+ OffsetNumber offnum ,
1287+ minoff ,
1288+ maxoff ;
1289+ ItemId itemid ;
1290+ BTItem item ;
1291+
1292+ minoff = P_FIRSTDATAKEY (opaque );
1293+ maxoff = PageGetMaxOffsetNumber (page );
1294+
1295+ /*
1296+ * start = InvalidOffsetNumber means "search the whole page".
1297+ * We need this test anyway due to possibility that
1298+ * page has a high key now when it didn't before.
1299+ */
1300+ if (start < minoff )
1301+ start = minoff ;
1302+
1303+ /*
1304+ * These loops will check every item on the page --- but in an
1305+ * order that's attuned to the probability of where it actually
1306+ * is. Scan to the right first, then to the left.
1307+ */
1308+ for (offnum = start ;
1309+ offnum <=maxoff ;
1310+ offnum = OffsetNumberNext (offnum ))
12851311{
1286- /* Return accurate pointer to where link is now */
1287- stack -> bts_blkno = blkno ;
1288- stack -> bts_offset = offnum ;
1289- return buf ;
1312+ itemid = PageGetItemId (page ,offnum );
1313+ item = (BTItem )PageGetItem (page ,itemid );
1314+ if (BTItemSame (item ,& stack -> bts_btitem ))
1315+ {
1316+ /* Return accurate pointer to where link is now */
1317+ stack -> bts_blkno = blkno ;
1318+ stack -> bts_offset = offnum ;
1319+ return buf ;
1320+ }
12901321}
1291- }
12921322
1293- for (offnum = OffsetNumberPrev (start );
1294- offnum >=minoff ;
1295- offnum = OffsetNumberPrev (offnum ))
1296- {
1297- itemid = PageGetItemId (page ,offnum );
1298- item = (BTItem )PageGetItem (page ,itemid );
1299- if (BTItemSame (item ,& stack -> bts_btitem ))
1323+ for (offnum = OffsetNumberPrev (start );
1324+ offnum >=minoff ;
1325+ offnum = OffsetNumberPrev (offnum ))
13001326{
1301- /* Return accurate pointer to where link is now */
1302- stack -> bts_blkno = blkno ;
1303- stack -> bts_offset = offnum ;
1304- return buf ;
1327+ itemid = PageGetItemId (page ,offnum );
1328+ item = (BTItem )PageGetItem (page ,itemid );
1329+ if (BTItemSame (item ,& stack -> bts_btitem ))
1330+ {
1331+ /* Return accurate pointer to where link is now */
1332+ stack -> bts_blkno = blkno ;
1333+ stack -> bts_offset = offnum ;
1334+ return buf ;
1335+ }
13051336}
13061337}
13071338
@@ -1365,6 +1396,8 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
13651396rootbuf = _bt_getbuf (rel ,P_NEW ,BT_WRITE );
13661397rootpage = BufferGetPage (rootbuf );
13671398rootblknum = BufferGetBlockNumber (rootbuf );
1399+
1400+ /* acquire lock on the metapage */
13681401metabuf = _bt_getbuf (rel ,BTREE_METAPAGE ,BT_WRITE );
13691402metapg = BufferGetPage (metabuf );
13701403metad = BTPageGetMeta (metapg );