88 *
99 *
1010 * IDENTIFICATION
11- * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.87 2001/10/25 05:49:21 momjian Exp $
11+ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.88 2002/01/01 20:32:37 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -75,7 +75,6 @@ static void _bt_pgaddtup(Relation rel, Page page,
7575static bool _bt_isequal (TupleDesc itupdesc ,Page page ,OffsetNumber offnum ,
7676int keysz ,ScanKey scankey );
7777
78- static Relation _xlheapRel ;/* temporary hack */
7978
8079/*
8180 *_bt_doinsert() -- Handle insertion of a single btitem in the tree.
@@ -116,7 +115,21 @@ _bt_doinsert(Relation rel, BTItem btitem,
116115
117116/*
118117 * If we're not allowing duplicates, make sure the key isn't already
119- * in the index. XXX this belongs somewhere else, likely
118+ * in the index.
119+ *
120+ * NOTE: obviously, _bt_check_unique can only detect keys that are
121+ * already in the index; so it cannot defend against concurrent
122+ * insertions of the same key. We protect against that by means
123+ * of holding a write lock on the target page. Any other would-be
124+ * inserter of the same key must acquire a write lock on the same
125+ * target page, so only one would-be inserter can be making the check
126+ * at one time. Furthermore, once we are past the check we hold
127+ * write locks continuously until we have performed our insertion,
128+ * so no later inserter can fail to see our insertion. (This
129+ * requires some care in _bt_insertonpg.)
130+ *
131+ * If we must wait for another xact, we release the lock while waiting,
132+ * and then must start over completely.
120133 */
121134if (index_is_unique )
122135{
@@ -135,8 +148,6 @@ _bt_doinsert(Relation rel, BTItem btitem,
135148}
136149}
137150
138- _xlheapRel = heapRel ;/* temporary hack */
139-
140151/* do the insertion */
141152res = _bt_insertonpg (rel ,buf ,stack ,natts ,itup_scankey ,btitem ,0 );
142153
@@ -397,9 +408,16 @@ _bt_insertonpg(Relation rel,
397408{
398409/* step right one page */
399410BlockNumber rblkno = lpageop -> btpo_next ;
411+ Buffer rbuf ;
400412
413+ /*
414+ * must write-lock next page before releasing write lock on
415+ * current page; else someone else's _bt_check_unique scan
416+ * could fail to see our insertion.
417+ */
418+ rbuf = _bt_getbuf (rel ,rblkno ,BT_WRITE );
401419_bt_relbuf (rel ,buf );
402- buf = _bt_getbuf ( rel , rblkno , BT_WRITE ) ;
420+ buf = rbuf ;
403421page = BufferGetPage (buf );
404422lpageop = (BTPageOpaque )PageGetSpecialPointer (page );
405423movedright = true;
@@ -833,7 +851,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
833851 * page is not updated yet. Log changes before continuing.
834852 *
835853 * NO ELOG(ERROR) till right sibling is updated.
836- *
837854 */
838855START_CRIT_SECTION ();
839856{