1818 * that is reading in or writing out a page buffer does not hold the control
1919 * lock, only the per-buffer lock for the buffer it is working on.
2020 *
21- * To change the page number or state of a buffer, one must normally hold
22- * the control lock. (The sole exception to this rule is that a writer
23- * process changes the state from DIRTY to WRITE_IN_PROGRESS while holding
24- * only the per-buffer lock.) If the buffer's state is neither EMPTY nor
21+ * To change the page number or state of a buffer, one must hold
22+ * the control lock. If the buffer's state is neither EMPTY nor
2523 * CLEAN, then there may be processes doing (or waiting to do) I/O on the
2624 * buffer, so the page number may not be changed, and the only allowed state
2725 * transition is to change WRITE_IN_PROGRESS to DIRTY after dirtying the page.
3533 * the read, while the early marking prevents someone else from trying to
3634 * read the same page into a different buffer.
3735 *
38- * Note we are assuming that read and write of the state value is atomic,
39- * since I/O processes may examine and change the state while not holding
40- * the control lock.
41- *
4236 * As with the regular buffer manager, it is possible for another process
4337 * to re-dirty a page that is currently being written out.This is handled
4438 * by setting the page's state from WRITE_IN_PROGRESS to DIRTY. The writing
4842 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
4943 * Portions Copyright (c) 1994, Regents of the University of California
5044 *
51- * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.28 2005/10/15 02:49:09 momjian Exp $
45+ * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.29 2005/11/03 00:23:36 tgl Exp $
5246 *
5347 *-------------------------------------------------------------------------
5448 */
@@ -283,9 +277,19 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid)
283277 */
284278SlruRecentlyUsed (shared ,slotno );
285279
286- /* Release shared lock, grab per-buffer lock instead */
287- LWLockRelease (shared -> ControlLock );
288- LWLockAcquire (shared -> buffer_locks [slotno ],LW_EXCLUSIVE );
280+ /*
281+ * We must grab the per-buffer lock to do I/O. To avoid deadlock,
282+ * must release ControlLock while waiting for per-buffer lock.
283+ * Fortunately, most of the time the per-buffer lock shouldn't be
284+ * already held, so we can do this:
285+ */
286+ if (!LWLockConditionalAcquire (shared -> buffer_locks [slotno ],
287+ LW_EXCLUSIVE ))
288+ {
289+ LWLockRelease (shared -> ControlLock );
290+ LWLockAcquire (shared -> buffer_locks [slotno ],LW_EXCLUSIVE );
291+ LWLockAcquire (shared -> ControlLock ,LW_EXCLUSIVE );
292+ }
289293
290294/*
291295 * Check to see if someone else already did the read, or took the
@@ -295,11 +299,12 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid)
295299shared -> page_status [slotno ]!= SLRU_PAGE_READ_IN_PROGRESS )
296300{
297301LWLockRelease (shared -> buffer_locks [slotno ]);
298- LWLockAcquire (shared -> ControlLock ,LW_EXCLUSIVE );
299302continue ;
300303}
301304
302- /* Okay, do the read */
305+ /* Okay, release control lock and do the read */
306+ LWLockRelease (shared -> ControlLock );
307+
303308ok = SlruPhysicalReadPage (ctl ,pageno ,slotno );
304309
305310/* Re-acquire shared control lock and update page state */
@@ -346,9 +351,19 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
346351
347352pageno = shared -> page_number [slotno ];
348353
349- /* Release shared lock, grab per-buffer lock instead */
350- LWLockRelease (shared -> ControlLock );
351- LWLockAcquire (shared -> buffer_locks [slotno ],LW_EXCLUSIVE );
354+ /*
355+ * We must grab the per-buffer lock to do I/O. To avoid deadlock,
356+ * must release ControlLock while waiting for per-buffer lock.
357+ * Fortunately, most of the time the per-buffer lock shouldn't be
358+ * already held, so we can do this:
359+ */
360+ if (!LWLockConditionalAcquire (shared -> buffer_locks [slotno ],
361+ LW_EXCLUSIVE ))
362+ {
363+ LWLockRelease (shared -> ControlLock );
364+ LWLockAcquire (shared -> buffer_locks [slotno ],LW_EXCLUSIVE );
365+ LWLockAcquire (shared -> ControlLock ,LW_EXCLUSIVE );
366+ }
352367
353368/*
354369 * Check to see if someone else already did the write, or took the buffer
@@ -362,24 +377,18 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
362377shared -> page_status [slotno ]!= SLRU_PAGE_WRITE_IN_PROGRESS ))
363378{
364379LWLockRelease (shared -> buffer_locks [slotno ]);
365- LWLockAcquire (shared -> ControlLock ,LW_EXCLUSIVE );
366380return ;
367381}
368382
369383/*
370384 * Mark the slot write-busy. After this point, a transaction status
371- * update on this page will mark it dirty again. NB: we are assuming that
372- * read/write of the page status field is atomic, since we change the
373- * state while not holding control lock. However, we cannot set this
374- * state any sooner, or we'd possibly fool a previous writer into thinking
375- * he's successfully dumped the page when he hasn't. (Scenario: other
376- * writer starts, page is redirtied, we come along and set
377- * WRITE_IN_PROGRESS again, other writer completes and sets CLEAN because
378- * redirty info has been lost, then we think it's clean too.)
385+ * update on this page will mark it dirty again.
379386 */
380387shared -> page_status [slotno ]= SLRU_PAGE_WRITE_IN_PROGRESS ;
381388
382- /* Okay, do the write */
389+ /* Okay, release the control lock and do the write */
390+ LWLockRelease (shared -> ControlLock );
391+
383392ok = SlruPhysicalWritePage (ctl ,pageno ,slotno ,fdata );
384393
385394/* If we failed, and we're in a flush, better close the files */
@@ -745,12 +754,16 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
745754/*
746755 * We need to do I/O. Normal case is that we have to write it out,
747756 * but it's possible in the worst case to have selected a read-busy
748- * page. In that case weuse SimpleLruReadPage to wait forthe read
749- *to complete .
757+ * page. In that case wejust wait forsomeone else to complete
758+ *the I/O, which we can do by waiting for the per-buffer lock .
750759 */
751760if (shared -> page_status [bestslot ]== SLRU_PAGE_READ_IN_PROGRESS )
752- (void )SimpleLruReadPage (ctl ,shared -> page_number [bestslot ],
753- InvalidTransactionId );
761+ {
762+ LWLockRelease (shared -> ControlLock );
763+ LWLockAcquire (shared -> buffer_locks [bestslot ],LW_SHARED );
764+ LWLockRelease (shared -> buffer_locks [bestslot ]);
765+ LWLockAcquire (shared -> ControlLock ,LW_EXCLUSIVE );
766+ }
754767else
755768SimpleLruWritePage (ctl ,bestslot ,NULL );
756769
@@ -885,8 +898,12 @@ restart:;
885898 * the same logic as in SlruSelectLRUPage.
886899 */
887900if (shared -> page_status [slotno ]== SLRU_PAGE_READ_IN_PROGRESS )
888- (void )SimpleLruReadPage (ctl ,shared -> page_number [slotno ],
889- InvalidTransactionId );
901+ {
902+ LWLockRelease (shared -> ControlLock );
903+ LWLockAcquire (shared -> buffer_locks [slotno ],LW_SHARED );
904+ LWLockRelease (shared -> buffer_locks [slotno ]);
905+ LWLockAcquire (shared -> ControlLock ,LW_EXCLUSIVE );
906+ }
890907else
891908SimpleLruWritePage (ctl ,slotno ,NULL );
892909gotorestart ;