77 *
88 *
99 * IDENTIFICATION
10- * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.70 2000/01/15 02:59:33 petere Exp $
10+ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.71 2000/01/17 01:15:17 inoue Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -74,6 +74,18 @@ static intWriteMode = BUFFER_LATE_WRITE;/* Delayed write is
7474
7575static void WaitIO (BufferDesc * buf ,SPINLOCK spinlock );
7676
77+ static void StartBufferIO (BufferDesc * buf ,bool forInput );
78+ static void TerminateBufferIO (BufferDesc * buf );
79+ static void ContinueBufferIO (BufferDesc * buf ,bool forInput );
80+ extern void InitBufferIO (void );
81+ extern void AbortBufferIO (void );
82+
83+ /*
84+ * Macro : BUFFER_IS_BROKEN
85+ * Note that write error doesn't mean the buffer broken
86+ */
87+ #define BUFFER_IS_BROKEN (buf ) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY))
88+
7789#ifndef HAS_TEST_AND_SET
7890static void SignalIO (BufferDesc * buf );
7991extern long * NWaitIOBackendP ;/* defined in buf_init.c */
@@ -312,12 +324,7 @@ ReadBufferWithBufferLock(Relation reln,
312324}
313325
314326/* If anyone was waiting for IO to complete, wake them up now */
315- #ifdef HAS_TEST_AND_SET
316- S_UNLOCK (& (bufHdr -> io_in_progress_lock ));
317- #else
318- if (bufHdr -> refcount > 1 )
319- SignalIO (bufHdr );
320- #endif
327+ TerminateBufferIO (bufHdr );
321328
322329SpinRelease (BufMgrLock );
323330
@@ -375,13 +382,19 @@ BufferAlloc(Relation reln,
375382inProgress = (buf -> flags & BM_IO_IN_PROGRESS );
376383
377384* foundPtr = TRUE;
378- if (inProgress )
385+ if (inProgress )/* confirm end of IO */
379386{
380387WaitIO (buf ,BufMgrLock );
381- if (buf -> flags & BM_IO_ERROR )
382- {
388+ inProgress = (buf -> flags & BM_IO_IN_PROGRESS );
389+ }
390+ if (BUFFER_IS_BROKEN (buf ))
391+ {
392+ /* I couldn't understand the following old comment.
393+ * If there's no IO for the buffer and the buffer
394+ * is BROKEN,it should be read again. So start a
395+ * new buffer IO here.
383396
384- / *
397+ *
385398 * wierd race condition:
386399 *
387400 * We were waiting for someone else to read the buffer. While
@@ -396,13 +409,14 @@ BufferAlloc(Relation reln,
396409 *
397410 * This is never going to happen, don't worry about it.
398411 */
399- * foundPtr = FALSE;
400- }
412+ * foundPtr = FALSE;
401413}
402414#ifdef BMTRACE
403415_bm_trace ((reln -> rd_rel -> relisshared ?0 :MyDatabaseId ),RelationGetRelid (reln ),blockNum ,BufferDescriptorGetBuffer (buf ),BMT_ALLOCFND );
404416#endif /* BMTRACE */
405417
418+ if (!(* foundPtr ))
419+ StartBufferIO (buf , true);
406420SpinRelease (BufMgrLock );
407421
408422return buf ;
@@ -454,17 +468,15 @@ BufferAlloc(Relation reln,
454468 * in WaitIO until we're done.
455469 */
456470inProgress = TRUE;
457- buf -> flags |=BM_IO_IN_PROGRESS ;
458471#ifdef HAS_TEST_AND_SET
459472
460473/*
461474 * All code paths that acquire this lock pin the buffer first;
462475 * since no one had it pinned (it just came off the free
463476 * list), no one else can have this lock.
464477 */
465- Assert (S_LOCK_FREE (& (buf -> io_in_progress_lock )));
466- S_LOCK (& (buf -> io_in_progress_lock ));
467478#endif /* HAS_TEST_AND_SET */
479+ StartBufferIO (buf , false);
468480
469481/*
470482 * Write the buffer out, being careful to release BufMgrLock
@@ -486,12 +498,7 @@ BufferAlloc(Relation reln,
486498inProgress = FALSE;
487499buf -> flags |=BM_IO_ERROR ;
488500buf -> flags &= ~BM_IO_IN_PROGRESS ;
489- #ifdef HAS_TEST_AND_SET
490- S_UNLOCK (& (buf -> io_in_progress_lock ));
491- #else /* !HAS_TEST_AND_SET */
492- if (buf -> refcount > 1 )
493- SignalIO (buf );
494- #endif /* !HAS_TEST_AND_SET */
501+ TerminateBufferIO (buf );
495502PrivateRefCount [BufferDescriptorGetBuffer (buf )- 1 ]= 0 ;
496503Assert (buf -> refcount > 0 );
497504buf -> refcount -- ;
@@ -533,12 +540,7 @@ BufferAlloc(Relation reln,
533540{
534541inProgress = FALSE;
535542buf -> flags &= ~BM_IO_IN_PROGRESS ;
536- #ifdef HAS_TEST_AND_SET
537- S_UNLOCK (& (buf -> io_in_progress_lock ));
538- #else /* !HAS_TEST_AND_SET */
539- if (buf -> refcount > 1 )
540- SignalIO (buf );
541- #endif /* !HAS_TEST_AND_SET */
543+ TerminateBufferIO (buf );
542544PrivateRefCount [BufferDescriptorGetBuffer (buf )- 1 ]= 0 ;
543545buf -> refcount -- ;
544546buf = (BufferDesc * )NULL ;
@@ -563,12 +565,7 @@ BufferAlloc(Relation reln,
563565 */
564566if (buf != NULL )
565567{
566- #ifdef HAS_TEST_AND_SET
567- S_UNLOCK (& (buf -> io_in_progress_lock ));
568- #else /* !HAS_TEST_AND_SET */
569- if (buf -> refcount > 1 )
570- SignalIO (buf );
571- #endif /* !HAS_TEST_AND_SET */
568+ TerminateBufferIO (buf );
572569/* give up the buffer since we don't need it any more */
573570PrivateRefCount [BufferDescriptorGetBuffer (buf )- 1 ]= 0 ;
574571Assert (buf -> refcount > 0 );
@@ -588,10 +585,13 @@ BufferAlloc(Relation reln,
588585if (inProgress )
589586{
590587WaitIO (buf2 ,BufMgrLock );
591- if (buf2 -> flags & BM_IO_ERROR )
592- * foundPtr = FALSE;
588+ inProgress = (buf2 -> flags & BM_IO_IN_PROGRESS );
593589}
590+ if (BUFFER_IS_BROKEN (buf2 ))
591+ * foundPtr = FALSE;
594592
593+ if (!(* foundPtr ))
594+ StartBufferIO (buf2 , true);
595595SpinRelease (BufMgrLock );
596596
597597return buf2 ;
@@ -640,12 +640,10 @@ BufferAlloc(Relation reln,
640640 */
641641if (!inProgress )
642642{
643- buf -> flags |=BM_IO_IN_PROGRESS ;
644- #ifdef HAS_TEST_AND_SET
645- Assert (S_LOCK_FREE (& (buf -> io_in_progress_lock )));
646- S_LOCK (& (buf -> io_in_progress_lock ));
647- #endif /* HAS_TEST_AND_SET */
643+ StartBufferIO (buf , true);
648644}
645+ else
646+ ContinueBufferIO (buf , true);
649647
650648#ifdef BMTRACE
651649_bm_trace ((reln -> rd_rel -> relisshared ?0 :MyDatabaseId ),RelationGetRelid (reln ),blockNum ,BufferDescriptorGetBuffer (buf ),BMT_ALLOCNOTFND );
@@ -806,7 +804,9 @@ FlushBuffer(Buffer buffer, bool release)
806804
807805/* To check if block content changed while flushing. - vadim 01/17/97 */
808806SpinAcquire (BufMgrLock );
807+ WaitIO (bufHdr ,BufMgrLock );/* confirm end of IO */
809808bufHdr -> flags &= ~BM_JUST_DIRTIED ;
809+ StartBufferIO (bufHdr , false);/* output IO start */
810810SpinRelease (BufMgrLock );
811811
812812status = smgrflush (DEFAULT_SMGR ,bufrel ,bufHdr -> tag .blockNum ,
@@ -824,6 +824,8 @@ FlushBuffer(Buffer buffer, bool release)
824824BufferFlushCount ++ ;
825825
826826SpinAcquire (BufMgrLock );
827+ bufHdr -> flags &= ~BM_IO_IN_PROGRESS ;/* mark IO finished */
828+ TerminateBufferIO (bufHdr );/* output IO finished */
827829
828830/*
829831 * If this buffer was marked by someone as DIRTY while we were
@@ -998,7 +1000,9 @@ BufferSync()
9981000 * To check if block content changed while flushing (see
9991001 * below). - vadim 01/17/97
10001002 */
1003+ WaitIO (bufHdr ,BufMgrLock );/* confirm end of IO */
10011004bufHdr -> flags &= ~BM_JUST_DIRTIED ;
1005+ StartBufferIO (bufHdr , false);/* output IO start */
10021006
10031007/*
10041008 * If we didn't have the reldesc in our local cache, flush
@@ -1034,6 +1038,8 @@ BufferSync()
10341038elog (ERROR ,"BufferSync: cannot write %u for %s" ,
10351039bufHdr -> tag .blockNum ,bufHdr -> sb_relname );
10361040}
1041+ bufHdr -> flags &= ~BM_IO_IN_PROGRESS ;/* mark IO finished */
1042+ TerminateBufferIO (bufHdr );/* Sync IO finished */
10371043BufferFlushCount ++ ;
10381044
10391045/*
@@ -1084,10 +1090,16 @@ BufferSync()
10841090static void
10851091WaitIO (BufferDesc * buf ,SPINLOCK spinlock )
10861092{
1087- SpinRelease (spinlock );
1088- S_LOCK (& (buf -> io_in_progress_lock ));
1089- S_UNLOCK (& (buf -> io_in_progress_lock ));
1090- SpinAcquire (spinlock );
1093+ /*
1094+ * Changed to wait until there's no IO - Inoue 01/13/2000
1095+ */
1096+ while ((buf -> flags & BM_IO_IN_PROGRESS )!= 0 )
1097+ {
1098+ SpinRelease (spinlock );
1099+ S_LOCK (& (buf -> io_in_progress_lock ));
1100+ S_UNLOCK (& (buf -> io_in_progress_lock ));
1101+ SpinAcquire (spinlock );
1102+ }
10911103}
10921104
10931105#else /* !HAS_TEST_AND_SET */
@@ -2163,3 +2175,112 @@ LockBuffer(Buffer buffer, int mode)
21632175#endif
21642176
21652177}
2178+
2179+ /*
2180+ *Functions for IO error handling
2181+ *
2182+ *Note : We assume that nested buffer IO never occur.
2183+ *i.e at most one io_in_progress spinlock is held
2184+ *per proc.
2185+ */
2186+ static BufferDesc * InProgressBuf = (BufferDesc * )NULL ;
2187+ static bool IsForInput ;
2188+
2189+ /*
2190+ * Function:StartBufferIO
2191+ *(Assumptions)
2192+ *My process is executing no IO
2193+ *BufMgrLock is held
2194+ *BM_IO_IN_PROGRESS mask is not set for the buffer
2195+ *The buffer is Pinned
2196+ *
2197+ */
2198+ static void StartBufferIO (BufferDesc * buf ,bool forInput )
2199+ {
2200+ Assert (!InProgressBuf );
2201+ Assert (!(buf -> flags & BM_IO_IN_PROGRESS ));
2202+ buf -> flags |=BM_IO_IN_PROGRESS ;
2203+ #ifdef HAS_TEST_AND_SET
2204+ Assert (S_LOCK_FREE (& (buf -> io_in_progress_lock )))
2205+ S_LOCK (& (buf -> io_in_progress_lock ));
2206+ #endif /* HAS_TEST_AND_SET */
2207+ InProgressBuf = buf ;
2208+ IsForInput = forInput ;
2209+ }
2210+
2211+ /*
2212+ * Function:TerminateBufferIO
2213+ *(Assumptions)
2214+ *My process is executing IO for the buffer
2215+ *BufMgrLock is held
2216+ *The buffer is Pinned
2217+ *
2218+ */
2219+ static void TerminateBufferIO (BufferDesc * buf )
2220+ {
2221+ Assert (buf == InProgressBuf );
2222+ #ifdef HAS_TEST_AND_SET
2223+ S_UNLOCK (& (buf -> io_in_progress_lock ));
2224+ #else
2225+ if (buf -> refcount > 1 )
2226+ SignalIO (buf );
2227+ #endif /* HAS_TEST_AND_SET */
2228+ InProgressBuf = (BufferDesc * )0 ;
2229+ }
2230+
2231+ /*
2232+ * Function:ContinueBufferIO
2233+ *(Assumptions)
2234+ *My process is executing IO for the buffer
2235+ *BufMgrLock is held
2236+ *The buffer is Pinned
2237+ *
2238+ */
2239+ static void ContinueBufferIO (BufferDesc * buf ,bool forInput )
2240+ {
2241+ Assert (buf == InProgressBuf );
2242+ Assert (buf -> flags & BM_IO_IN_PROGRESS );
2243+ IsForInput = forInput ;
2244+ }
2245+
2246+ extern void InitBufferIO (void )
2247+ {
2248+ InProgressBuf = (BufferDesc * )0 ;
2249+ }
2250+
2251+ /*
2252+ *This function is called from ProcReleaseSpins().
2253+ *BufMgrLock isn't held when this function is called.
2254+ *BM_IO_ERROR is always set. If BM_IO_ERROR was already
2255+ *set in case of output,this routine would kill all
2256+ *backends and reset postmaster.
2257+ */
2258+ extern void AbortBufferIO (void )
2259+ {
2260+ BufferDesc * buf = InProgressBuf ;
2261+ if (buf )
2262+ {
2263+ Assert (buf -> flags & BM_IO_IN_PROGRESS );
2264+ SpinAcquire (BufMgrLock );
2265+ if (IsForInput )
2266+ {
2267+ Assert (!(buf -> flags & BM_DIRTY ));
2268+ }
2269+ else
2270+ {
2271+ Assert (!(buf -> flags & BM_DIRTY ));
2272+ /* Assert(!(buf->flags & BM_IO_ERROR)); */
2273+ if (buf -> flags & BM_IO_ERROR )
2274+ {
2275+ elog (NOTICE ,"!!! write error seems permanent !!!" );
2276+ elog (NOTICE ,"!!! now kill all backends and reset postmaster !!!" );
2277+ proc_exit (255 );
2278+ }
2279+ buf -> flags |=BM_DIRTY ;
2280+ }
2281+ buf -> flags |=BM_IO_ERROR ;
2282+ TerminateBufferIO (buf );
2283+ buf -> flags &= ~BM_IO_IN_PROGRESS ;
2284+ SpinRelease (BufMgrLock );
2285+ }
2286+ }