1212 * Portions Copyright (c) 1994, Regents of the University of California
1313 *
1414 * IDENTIFICATION
15- * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.85 2001/11/10 23:51:13 tgl Exp $
15+ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.86 2001/11/23 23:41:54 tgl Exp $
1616 *
1717 *-------------------------------------------------------------------------
1818 */
@@ -592,6 +592,7 @@ btbulkdelete(PG_FUNCTION_ARGS)
592592BlockNumber blkno ;
593593OffsetNumber offnum ;
594594BTItem btitem ;
595+ BTPageOpaque opaque ;
595596IndexTuple itup ;
596597ItemPointer htup ;
597598
@@ -608,9 +609,17 @@ btbulkdelete(PG_FUNCTION_ARGS)
608609/*
609610 * If this is first deletion on this page, trade in read
610611 * lock for a really-exclusive write lock.Then, step
611- * back one and re-examine the item, becausesomeone else
612- * might have insertedan item while we weren't holding
612+ * back one and re-examine the item, becauseother backends
613+ * might have inserted item(s) while we weren't holding
613614 * the lock!
615+ *
616+ * We assume that only concurrent insertions, not deletions,
617+ * can occur while we're not holding the page lock (the caller
618+ * should hold a suitable relation lock to ensure this).
619+ * Therefore, the item we want to delete is either in the
620+ * same slot as before, or some slot to its right.
621+ * Rechecking the same slot is necessary and sufficient to
622+ * get back in sync after any insertions.
614623 */
615624if (blkno != lockedBlock )
616625{
@@ -620,7 +629,7 @@ btbulkdelete(PG_FUNCTION_ARGS)
620629}
621630else
622631{
623- /*Delete the item from the page */
632+ /*Okay to delete the item from the page */
624633_bt_itemdel (rel ,buf ,current );
625634
626635/* Mark buffer dirty, but keep the lock and pin */
@@ -630,14 +639,23 @@ btbulkdelete(PG_FUNCTION_ARGS)
630639}
631640
632641/*
633- * We need to back up the scan one item so that the next
634- * cycle will re-examine the same offnum on this page.
642+ * In either case, we now need to back up the scan one item,
643+ * so that the next cycle will re-examine the same offnum on
644+ * this page.
635645 *
636646 * For now, just hack the current-item index. Will need to
637647 * be smarter when deletion includes removal of empty
638648 * index pages.
649+ *
650+ * We must decrement ip_posid in all cases but one: if the
651+ * page was formerly rightmost but was split while we didn't
652+ * hold the lock, and ip_posid is pointing to item 1, then
653+ * ip_posid now points at the high key not a valid data item.
654+ * In this case we do want to step forward.
639655 */
640- current -> ip_posid -- ;
656+ opaque = (BTPageOpaque )PageGetSpecialPointer (page );
657+ if (current -> ip_posid >=P_FIRSTDATAKEY (opaque ))
658+ current -> ip_posid -- ;
641659}
642660else
643661num_index_tuples += 1 ;