1313 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
1414 * Portions Copyright (c) 1994, Regents of the University of California
1515 *
16- * $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.11 2002/09/26 22:58 :33 tgl Exp $
16+ * $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.12 2003/04/14 17:31 :33 tgl Exp $
1717 *
1818 *-------------------------------------------------------------------------
1919 */
@@ -123,7 +123,7 @@ typedef enum
123123CLOG_PAGE_READ_IN_PROGRESS ,/* CLOG page is being read in */
124124CLOG_PAGE_CLEAN ,/* CLOG page is valid and not dirty */
125125CLOG_PAGE_DIRTY ,/* CLOG page is valid but needs write */
126- CLOG_PAGE_WRITE_IN_PROGRESS /* CLOG page is being written outin */
126+ CLOG_PAGE_WRITE_IN_PROGRESS /* CLOG page is being written out */
127127}ClogPageStatus ;
128128
129129/*
@@ -180,12 +180,25 @@ static char ClogDir[MAXPGPATH];
180180ClogCtl->page_lru_count[slotno] = 0; \
181181} while (0)
182182
183+ /* Saved info for CLOGReportIOError */
184+ typedef enum
185+ {
186+ CLOG_OPEN_FAILED ,
187+ CLOG_CREATE_FAILED ,
188+ CLOG_SEEK_FAILED ,
189+ CLOG_READ_FAILED ,
190+ CLOG_WRITE_FAILED
191+ }ClogErrorCause ;
192+ static ClogErrorCause clog_errcause ;
193+ static int clog_errno ;
194+
183195
184196static int ZeroCLOGPage (int pageno ,bool writeXlog );
185- static int ReadCLOGPage (int pageno );
197+ static int ReadCLOGPage (int pageno , TransactionId xid );
186198static void WriteCLOGPage (int slotno );
187- static void CLOGPhysicalReadPage (int pageno ,int slotno );
188- static void CLOGPhysicalWritePage (int pageno ,int slotno );
199+ static bool CLOGPhysicalReadPage (int pageno ,int slotno );
200+ static bool CLOGPhysicalWritePage (int pageno ,int slotno );
201+ static void CLOGReportIOError (int pageno ,TransactionId xid );
189202static int SelectLRUCLOGPage (int pageno );
190203static bool ScanCLOGDirectory (int cutoffPage ,bool doDeletions );
191204static bool CLOGPagePrecedes (int page1 ,int page2 );
@@ -212,7 +225,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
212225
213226LWLockAcquire (CLogControlLock ,LW_EXCLUSIVE );
214227
215- slotno = ReadCLOGPage (pageno );
228+ slotno = ReadCLOGPage (pageno , xid );
216229byteptr = ClogCtl -> page_buffer [slotno ]+ byteno ;
217230
218231/* Current state should be 0 or target state */
@@ -244,7 +257,7 @@ TransactionIdGetStatus(TransactionId xid)
244257
245258LWLockAcquire (CLogControlLock ,LW_EXCLUSIVE );
246259
247- slotno = ReadCLOGPage (pageno );
260+ slotno = ReadCLOGPage (pageno , xid );
248261byteptr = ClogCtl -> page_buffer [slotno ]+ byteno ;
249262
250263status = (* byteptr >>bshift )& CLOG_XACT_BITMASK ;
@@ -362,18 +375,22 @@ ZeroCLOGPage(int pageno, bool writeXlog)
362375 * Find a CLOG page in a shared buffer, reading it in if necessary.
363376 * The page number must correspond to an already-initialized page.
364377 *
378+ * The passed-in xid is used only for error reporting, and may be
379+ * InvalidTransactionId if no specific xid is associated with the action.
380+ *
365381 * Return value is the shared-buffer slot number now holding the page.
366382 * The buffer's LRU access info is updated.
367383 *
368384 * Control lock must be held at entry, and will be held at exit.
369385 */
370386static int
371- ReadCLOGPage (int pageno )
387+ ReadCLOGPage (int pageno , TransactionId xid )
372388{
373389/* Outer loop handles restart if we lose the buffer to someone else */
374390for (;;)
375391{
376392int slotno ;
393+ bool ok ;
377394
378395/* See if page already is in memory; if not, pick victim slot */
379396slotno = SelectLRUCLOGPage (pageno );
@@ -424,18 +441,22 @@ ReadCLOGPage(int pageno)
424441}
425442
426443/* Okay, do the read */
427- CLOGPhysicalReadPage (pageno ,slotno );
444+ ok = CLOGPhysicalReadPage (pageno ,slotno );
428445
429446/* Re-acquire shared control lock and update page state */
430447LWLockAcquire (CLogControlLock ,LW_EXCLUSIVE );
431448
432449Assert (ClogCtl -> page_number [slotno ]== pageno &&
433450ClogCtl -> page_status [slotno ]== CLOG_PAGE_READ_IN_PROGRESS );
434451
435- ClogCtl -> page_status [slotno ]= CLOG_PAGE_CLEAN ;
452+ ClogCtl -> page_status [slotno ]= ok ? CLOG_PAGE_CLEAN : CLOG_PAGE_EMPTY ;
436453
437454LWLockRelease (ClogBufferLocks [slotno ]);
438455
456+ /* Now it's okay to elog if we failed */
457+ if (!ok )
458+ CLOGReportIOError (pageno ,xid );
459+
439460ClogRecentlyUsed (slotno );
440461return slotno ;
441462}
@@ -456,6 +477,7 @@ static void
456477WriteCLOGPage (int slotno )
457478{
458479int pageno ;
480+ bool ok ;
459481
460482/* Do nothing if page does not need writing */
461483if (ClogCtl -> page_status [slotno ]!= CLOG_PAGE_DIRTY &&
@@ -499,7 +521,7 @@ WriteCLOGPage(int slotno)
499521ClogCtl -> page_status [slotno ]= CLOG_PAGE_WRITE_IN_PROGRESS ;
500522
501523/* Okay, do the write */
502- CLOGPhysicalWritePage (pageno ,slotno );
524+ ok = CLOGPhysicalWritePage (pageno ,slotno );
503525
504526/* Re-acquire shared control lock and update page state */
505527LWLockAcquire (CLogControlLock ,LW_EXCLUSIVE );
@@ -510,18 +532,26 @@ WriteCLOGPage(int slotno)
510532
511533/* Cannot set CLEAN if someone re-dirtied page since write started */
512534if (ClogCtl -> page_status [slotno ]== CLOG_PAGE_WRITE_IN_PROGRESS )
513- ClogCtl -> page_status [slotno ]= CLOG_PAGE_CLEAN ;
535+ ClogCtl -> page_status [slotno ]= ok ? CLOG_PAGE_CLEAN : CLOG_PAGE_DIRTY ;
514536
515537LWLockRelease (ClogBufferLocks [slotno ]);
538+
539+ /* Now it's okay to elog if we failed */
540+ if (!ok )
541+ CLOGReportIOError (pageno ,InvalidTransactionId );
516542}
517543
518544/*
519545 * Physical read of a (previously existing) page into a buffer slot
520546 *
547+ * On failure, we cannot just elog(ERROR) since caller has put state in
548+ * shared memory that must be undone. So, we return FALSE and save enough
549+ * info in static variables to let CLOGReportIOError make the report.
550+ *
521551 * For now, assume it's not worth keeping a file pointer open across
522552 * read/write operations. We could cache one virtual file pointer ...
523553 */
524- static void
554+ static bool
525555CLOGPhysicalReadPage (int pageno ,int slotno )
526556{
527557int segno = pageno /CLOG_PAGES_PER_SEGMENT ;
@@ -543,31 +573,47 @@ CLOGPhysicalReadPage(int pageno, int slotno)
543573if (fd < 0 )
544574{
545575if (errno != ENOENT || !InRecovery )
546- elog (PANIC ,"open of %s failed: %m" ,path );
576+ {
577+ clog_errcause = CLOG_OPEN_FAILED ;
578+ clog_errno = errno ;
579+ return false;
580+ }
581+
547582elog (LOG ,"clog file %s doesn't exist, reading as zeroes" ,path );
548583MemSet (ClogCtl -> page_buffer [slotno ],0 ,CLOG_BLCKSZ );
549- return ;
584+ return true ;
550585}
551586
552587if (lseek (fd , (off_t )offset ,SEEK_SET )< 0 )
553- elog (PANIC ,"lseek of clog file %u, offset %u failed: %m" ,
554- segno ,offset );
588+ {
589+ clog_errcause = CLOG_SEEK_FAILED ;
590+ clog_errno = errno ;
591+ return false;
592+ }
555593
556594errno = 0 ;
557595if (read (fd ,ClogCtl -> page_buffer [slotno ],CLOG_BLCKSZ )!= CLOG_BLCKSZ )
558- elog (PANIC ,"read of clog file %u, offset %u failed: %m" ,
559- segno ,offset );
596+ {
597+ clog_errcause = CLOG_READ_FAILED ;
598+ clog_errno = errno ;
599+ return false;
600+ }
560601
561602close (fd );
603+ return true;
562604}
563605
564606/*
565607 * Physical write of a page from a buffer slot
566608 *
609+ * On failure, we cannot just elog(ERROR) since caller has put state in
610+ * shared memory that must be undone. So, we return FALSE and save enough
611+ * info in static variables to let CLOGReportIOError make the report.
612+ *
567613 * For now, assume it's not worth keeping a file pointer open across
568614 * read/write operations. We could cache one virtual file pointer ...
569615 */
570- static void
616+ static bool
571617CLOGPhysicalWritePage (int pageno ,int slotno )
572618{
573619int segno = pageno /CLOG_PAGES_PER_SEGMENT ;
@@ -595,28 +641,85 @@ CLOGPhysicalWritePage(int pageno, int slotno)
595641if (fd < 0 )
596642{
597643if (errno != ENOENT )
598- elog (PANIC ,"open of %s failed: %m" ,path );
644+ {
645+ clog_errcause = CLOG_OPEN_FAILED ;
646+ clog_errno = errno ;
647+ return false;
648+ }
649+
599650fd = BasicOpenFile (path ,O_RDWR |O_CREAT |O_EXCL |PG_BINARY ,
600651S_IRUSR |S_IWUSR );
601652if (fd < 0 )
602- elog (PANIC ,"creation of file %s failed: %m" ,path );
653+ {
654+ clog_errcause = CLOG_CREATE_FAILED ;
655+ clog_errno = errno ;
656+ return false;
657+ }
603658}
604659
605660if (lseek (fd , (off_t )offset ,SEEK_SET )< 0 )
606- elog (PANIC ,"lseek of clog file %u, offset %u failed: %m" ,
607- segno ,offset );
661+ {
662+ clog_errcause = CLOG_SEEK_FAILED ;
663+ clog_errno = errno ;
664+ return false;
665+ }
608666
609667errno = 0 ;
610668if (write (fd ,ClogCtl -> page_buffer [slotno ],CLOG_BLCKSZ )!= CLOG_BLCKSZ )
611669{
612670/* if write didn't set errno, assume problem is no disk space */
613671if (errno == 0 )
614672errno = ENOSPC ;
615- elog (PANIC ,"write of clog file %u, offset %u failed: %m" ,
616- segno ,offset );
673+ clog_errcause = CLOG_WRITE_FAILED ;
674+ clog_errno = errno ;
675+ return false;
617676}
618677
619678close (fd );
679+ return true;
680+ }
681+
682+ /*
683+ * Issue the error message after failure of CLOGPhysicalReadPage or
684+ * CLOGPhysicalWritePage. Call this after cleaning up shared-memory state.
685+ */
686+ static void
687+ CLOGReportIOError (int pageno ,TransactionId xid )
688+ {
689+ int segno = pageno /CLOG_PAGES_PER_SEGMENT ;
690+ int rpageno = pageno %CLOG_PAGES_PER_SEGMENT ;
691+ int offset = rpageno * CLOG_BLCKSZ ;
692+ char path [MAXPGPATH ];
693+
694+ /* XXX TODO: provide xid as context in error messages */
695+
696+ ClogFileName (path ,segno );
697+ errno = clog_errno ;
698+ switch (clog_errcause )
699+ {
700+ case CLOG_OPEN_FAILED :
701+ elog (ERROR ,"open of %s failed: %m" ,path );
702+ break ;
703+ case CLOG_CREATE_FAILED :
704+ elog (ERROR ,"creation of file %s failed: %m" ,path );
705+ break ;
706+ case CLOG_SEEK_FAILED :
707+ elog (ERROR ,"lseek of clog file %u, offset %u failed: %m" ,
708+ segno ,offset );
709+ break ;
710+ case CLOG_READ_FAILED :
711+ elog (ERROR ,"read of clog file %u, offset %u failed: %m" ,
712+ segno ,offset );
713+ break ;
714+ case CLOG_WRITE_FAILED :
715+ elog (ERROR ,"write of clog file %u, offset %u failed: %m" ,
716+ segno ,offset );
717+ break ;
718+ default :
719+ /* can't get here, we trust */
720+ elog (ERROR ,"unknown CLOG I/O error" );
721+ break ;
722+ }
620723}
621724
622725/*
@@ -679,7 +782,8 @@ SelectLRUCLOGPage(int pageno)
679782 * the read to complete.
680783 */
681784if (ClogCtl -> page_status [bestslot ]== CLOG_PAGE_READ_IN_PROGRESS )
682- (void )ReadCLOGPage (ClogCtl -> page_number [bestslot ]);
785+ (void )ReadCLOGPage (ClogCtl -> page_number [bestslot ],
786+ InvalidTransactionId );
683787else
684788WriteCLOGPage (bestslot );
685789
@@ -857,7 +961,8 @@ restart:;
857961 * This is the same logic as in SelectLRUCLOGPage.
858962 */
859963if (ClogCtl -> page_status [slotno ]== CLOG_PAGE_READ_IN_PROGRESS )
860- (void )ReadCLOGPage (ClogCtl -> page_number [slotno ]);
964+ (void )ReadCLOGPage (ClogCtl -> page_number [slotno ],
965+ InvalidTransactionId );
861966else
862967WriteCLOGPage (slotno );
863968gotorestart ;
@@ -886,7 +991,7 @@ ScanCLOGDirectory(int cutoffPage, bool doDeletions)
886991
887992cldir = opendir (ClogDir );
888993if (cldir == NULL )
889- elog (PANIC ,"could not open transaction-commit log directory (%s): %m" ,
994+ elog (ERROR ,"could not open transaction-commit log directory (%s): %m" ,
890995ClogDir );
891996
892997errno = 0 ;
@@ -911,7 +1016,7 @@ ScanCLOGDirectory(int cutoffPage, bool doDeletions)
9111016errno = 0 ;
9121017}
9131018if (errno )
914- elog (PANIC ,"could not read transaction-commit log directory (%s): %m" ,
1019+ elog (ERROR ,"could not read transaction-commit log directory (%s): %m" ,
9151020ClogDir );
9161021closedir (cldir );
9171022