13
13
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
14
14
* Portions Copyright (c) 1994, Regents of the University of California
15
15
*
16
- * $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.11 2002/09/26 22:58 :33 tgl Exp $
16
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.12 2003/04/14 17:31 :33 tgl Exp $
17
17
*
18
18
*-------------------------------------------------------------------------
19
19
*/
@@ -123,7 +123,7 @@ typedef enum
123
123
CLOG_PAGE_READ_IN_PROGRESS ,/* CLOG page is being read in */
124
124
CLOG_PAGE_CLEAN ,/* CLOG page is valid and not dirty */
125
125
CLOG_PAGE_DIRTY ,/* CLOG page is valid but needs write */
126
- CLOG_PAGE_WRITE_IN_PROGRESS /* CLOG page is being written outin */
126
+ CLOG_PAGE_WRITE_IN_PROGRESS /* CLOG page is being written out */
127
127
}ClogPageStatus ;
128
128
129
129
/*
@@ -180,12 +180,25 @@ static char ClogDir[MAXPGPATH];
180
180
ClogCtl->page_lru_count[slotno] = 0; \
181
181
} while (0)
182
182
183
+ /* Saved info for CLOGReportIOError */
184
+ typedef enum
185
+ {
186
+ CLOG_OPEN_FAILED ,
187
+ CLOG_CREATE_FAILED ,
188
+ CLOG_SEEK_FAILED ,
189
+ CLOG_READ_FAILED ,
190
+ CLOG_WRITE_FAILED
191
+ }ClogErrorCause ;
192
+ static ClogErrorCause clog_errcause ;
193
+ static int clog_errno ;
194
+
183
195
184
196
static int ZeroCLOGPage (int pageno ,bool writeXlog );
185
- static int ReadCLOGPage (int pageno );
197
+ static int ReadCLOGPage (int pageno , TransactionId xid );
186
198
static void WriteCLOGPage (int slotno );
187
- static void CLOGPhysicalReadPage (int pageno ,int slotno );
188
- static void CLOGPhysicalWritePage (int pageno ,int slotno );
199
+ static bool CLOGPhysicalReadPage (int pageno ,int slotno );
200
+ static bool CLOGPhysicalWritePage (int pageno ,int slotno );
201
+ static void CLOGReportIOError (int pageno ,TransactionId xid );
189
202
static int SelectLRUCLOGPage (int pageno );
190
203
static bool ScanCLOGDirectory (int cutoffPage ,bool doDeletions );
191
204
static bool CLOGPagePrecedes (int page1 ,int page2 );
@@ -212,7 +225,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
212
225
213
226
LWLockAcquire (CLogControlLock ,LW_EXCLUSIVE );
214
227
215
- slotno = ReadCLOGPage (pageno );
228
+ slotno = ReadCLOGPage (pageno , xid );
216
229
byteptr = ClogCtl -> page_buffer [slotno ]+ byteno ;
217
230
218
231
/* Current state should be 0 or target state */
@@ -244,7 +257,7 @@ TransactionIdGetStatus(TransactionId xid)
244
257
245
258
LWLockAcquire (CLogControlLock ,LW_EXCLUSIVE );
246
259
247
- slotno = ReadCLOGPage (pageno );
260
+ slotno = ReadCLOGPage (pageno , xid );
248
261
byteptr = ClogCtl -> page_buffer [slotno ]+ byteno ;
249
262
250
263
status = (* byteptr >>bshift )& CLOG_XACT_BITMASK ;
@@ -362,18 +375,22 @@ ZeroCLOGPage(int pageno, bool writeXlog)
362
375
* Find a CLOG page in a shared buffer, reading it in if necessary.
363
376
* The page number must correspond to an already-initialized page.
364
377
*
378
+ * The passed-in xid is used only for error reporting, and may be
379
+ * InvalidTransactionId if no specific xid is associated with the action.
380
+ *
365
381
* Return value is the shared-buffer slot number now holding the page.
366
382
* The buffer's LRU access info is updated.
367
383
*
368
384
* Control lock must be held at entry, and will be held at exit.
369
385
*/
370
386
static int
371
- ReadCLOGPage (int pageno )
387
+ ReadCLOGPage (int pageno , TransactionId xid )
372
388
{
373
389
/* Outer loop handles restart if we lose the buffer to someone else */
374
390
for (;;)
375
391
{
376
392
int slotno ;
393
+ bool ok ;
377
394
378
395
/* See if page already is in memory; if not, pick victim slot */
379
396
slotno = SelectLRUCLOGPage (pageno );
@@ -424,18 +441,22 @@ ReadCLOGPage(int pageno)
424
441
}
425
442
426
443
/* Okay, do the read */
427
- CLOGPhysicalReadPage (pageno ,slotno );
444
+ ok = CLOGPhysicalReadPage (pageno ,slotno );
428
445
429
446
/* Re-acquire shared control lock and update page state */
430
447
LWLockAcquire (CLogControlLock ,LW_EXCLUSIVE );
431
448
432
449
Assert (ClogCtl -> page_number [slotno ]== pageno &&
433
450
ClogCtl -> page_status [slotno ]== CLOG_PAGE_READ_IN_PROGRESS );
434
451
435
- ClogCtl -> page_status [slotno ]= CLOG_PAGE_CLEAN ;
452
+ ClogCtl -> page_status [slotno ]= ok ? CLOG_PAGE_CLEAN : CLOG_PAGE_EMPTY ;
436
453
437
454
LWLockRelease (ClogBufferLocks [slotno ]);
438
455
456
+ /* Now it's okay to elog if we failed */
457
+ if (!ok )
458
+ CLOGReportIOError (pageno ,xid );
459
+
439
460
ClogRecentlyUsed (slotno );
440
461
return slotno ;
441
462
}
@@ -456,6 +477,7 @@ static void
456
477
WriteCLOGPage (int slotno )
457
478
{
458
479
int pageno ;
480
+ bool ok ;
459
481
460
482
/* Do nothing if page does not need writing */
461
483
if (ClogCtl -> page_status [slotno ]!= CLOG_PAGE_DIRTY &&
@@ -499,7 +521,7 @@ WriteCLOGPage(int slotno)
499
521
ClogCtl -> page_status [slotno ]= CLOG_PAGE_WRITE_IN_PROGRESS ;
500
522
501
523
/* Okay, do the write */
502
- CLOGPhysicalWritePage (pageno ,slotno );
524
+ ok = CLOGPhysicalWritePage (pageno ,slotno );
503
525
504
526
/* Re-acquire shared control lock and update page state */
505
527
LWLockAcquire (CLogControlLock ,LW_EXCLUSIVE );
@@ -510,18 +532,26 @@ WriteCLOGPage(int slotno)
510
532
511
533
/* Cannot set CLEAN if someone re-dirtied page since write started */
512
534
if (ClogCtl -> page_status [slotno ]== CLOG_PAGE_WRITE_IN_PROGRESS )
513
- ClogCtl -> page_status [slotno ]= CLOG_PAGE_CLEAN ;
535
+ ClogCtl -> page_status [slotno ]= ok ? CLOG_PAGE_CLEAN : CLOG_PAGE_DIRTY ;
514
536
515
537
LWLockRelease (ClogBufferLocks [slotno ]);
538
+
539
+ /* Now it's okay to elog if we failed */
540
+ if (!ok )
541
+ CLOGReportIOError (pageno ,InvalidTransactionId );
516
542
}
517
543
518
544
/*
519
545
* Physical read of a (previously existing) page into a buffer slot
520
546
*
547
+ * On failure, we cannot just elog(ERROR) since caller has put state in
548
+ * shared memory that must be undone. So, we return FALSE and save enough
549
+ * info in static variables to let CLOGReportIOError make the report.
550
+ *
521
551
* For now, assume it's not worth keeping a file pointer open across
522
552
* read/write operations. We could cache one virtual file pointer ...
523
553
*/
524
- static void
554
+ static bool
525
555
CLOGPhysicalReadPage (int pageno ,int slotno )
526
556
{
527
557
int segno = pageno /CLOG_PAGES_PER_SEGMENT ;
@@ -543,31 +573,47 @@ CLOGPhysicalReadPage(int pageno, int slotno)
543
573
if (fd < 0 )
544
574
{
545
575
if (errno != ENOENT || !InRecovery )
546
- elog (PANIC ,"open of %s failed: %m" ,path );
576
+ {
577
+ clog_errcause = CLOG_OPEN_FAILED ;
578
+ clog_errno = errno ;
579
+ return false;
580
+ }
581
+
547
582
elog (LOG ,"clog file %s doesn't exist, reading as zeroes" ,path );
548
583
MemSet (ClogCtl -> page_buffer [slotno ],0 ,CLOG_BLCKSZ );
549
- return ;
584
+ return true ;
550
585
}
551
586
552
587
if (lseek (fd , (off_t )offset ,SEEK_SET )< 0 )
553
- elog (PANIC ,"lseek of clog file %u, offset %u failed: %m" ,
554
- segno ,offset );
588
+ {
589
+ clog_errcause = CLOG_SEEK_FAILED ;
590
+ clog_errno = errno ;
591
+ return false;
592
+ }
555
593
556
594
errno = 0 ;
557
595
if (read (fd ,ClogCtl -> page_buffer [slotno ],CLOG_BLCKSZ )!= CLOG_BLCKSZ )
558
- elog (PANIC ,"read of clog file %u, offset %u failed: %m" ,
559
- segno ,offset );
596
+ {
597
+ clog_errcause = CLOG_READ_FAILED ;
598
+ clog_errno = errno ;
599
+ return false;
600
+ }
560
601
561
602
close (fd );
603
+ return true;
562
604
}
563
605
564
606
/*
565
607
* Physical write of a page from a buffer slot
566
608
*
609
+ * On failure, we cannot just elog(ERROR) since caller has put state in
610
+ * shared memory that must be undone. So, we return FALSE and save enough
611
+ * info in static variables to let CLOGReportIOError make the report.
612
+ *
567
613
* For now, assume it's not worth keeping a file pointer open across
568
614
* read/write operations. We could cache one virtual file pointer ...
569
615
*/
570
- static void
616
+ static bool
571
617
CLOGPhysicalWritePage (int pageno ,int slotno )
572
618
{
573
619
int segno = pageno /CLOG_PAGES_PER_SEGMENT ;
@@ -595,28 +641,85 @@ CLOGPhysicalWritePage(int pageno, int slotno)
595
641
if (fd < 0 )
596
642
{
597
643
if (errno != ENOENT )
598
- elog (PANIC ,"open of %s failed: %m" ,path );
644
+ {
645
+ clog_errcause = CLOG_OPEN_FAILED ;
646
+ clog_errno = errno ;
647
+ return false;
648
+ }
649
+
599
650
fd = BasicOpenFile (path ,O_RDWR |O_CREAT |O_EXCL |PG_BINARY ,
600
651
S_IRUSR |S_IWUSR );
601
652
if (fd < 0 )
602
- elog (PANIC ,"creation of file %s failed: %m" ,path );
653
+ {
654
+ clog_errcause = CLOG_CREATE_FAILED ;
655
+ clog_errno = errno ;
656
+ return false;
657
+ }
603
658
}
604
659
605
660
if (lseek (fd , (off_t )offset ,SEEK_SET )< 0 )
606
- elog (PANIC ,"lseek of clog file %u, offset %u failed: %m" ,
607
- segno ,offset );
661
+ {
662
+ clog_errcause = CLOG_SEEK_FAILED ;
663
+ clog_errno = errno ;
664
+ return false;
665
+ }
608
666
609
667
errno = 0 ;
610
668
if (write (fd ,ClogCtl -> page_buffer [slotno ],CLOG_BLCKSZ )!= CLOG_BLCKSZ )
611
669
{
612
670
/* if write didn't set errno, assume problem is no disk space */
613
671
if (errno == 0 )
614
672
errno = ENOSPC ;
615
- elog (PANIC ,"write of clog file %u, offset %u failed: %m" ,
616
- segno ,offset );
673
+ clog_errcause = CLOG_WRITE_FAILED ;
674
+ clog_errno = errno ;
675
+ return false;
617
676
}
618
677
619
678
close (fd );
679
+ return true;
680
+ }
681
+
682
+ /*
683
+ * Issue the error message after failure of CLOGPhysicalReadPage or
684
+ * CLOGPhysicalWritePage. Call this after cleaning up shared-memory state.
685
+ */
686
+ static void
687
+ CLOGReportIOError (int pageno ,TransactionId xid )
688
+ {
689
+ int segno = pageno /CLOG_PAGES_PER_SEGMENT ;
690
+ int rpageno = pageno %CLOG_PAGES_PER_SEGMENT ;
691
+ int offset = rpageno * CLOG_BLCKSZ ;
692
+ char path [MAXPGPATH ];
693
+
694
+ /* XXX TODO: provide xid as context in error messages */
695
+
696
+ ClogFileName (path ,segno );
697
+ errno = clog_errno ;
698
+ switch (clog_errcause )
699
+ {
700
+ case CLOG_OPEN_FAILED :
701
+ elog (ERROR ,"open of %s failed: %m" ,path );
702
+ break ;
703
+ case CLOG_CREATE_FAILED :
704
+ elog (ERROR ,"creation of file %s failed: %m" ,path );
705
+ break ;
706
+ case CLOG_SEEK_FAILED :
707
+ elog (ERROR ,"lseek of clog file %u, offset %u failed: %m" ,
708
+ segno ,offset );
709
+ break ;
710
+ case CLOG_READ_FAILED :
711
+ elog (ERROR ,"read of clog file %u, offset %u failed: %m" ,
712
+ segno ,offset );
713
+ break ;
714
+ case CLOG_WRITE_FAILED :
715
+ elog (ERROR ,"write of clog file %u, offset %u failed: %m" ,
716
+ segno ,offset );
717
+ break ;
718
+ default :
719
+ /* can't get here, we trust */
720
+ elog (ERROR ,"unknown CLOG I/O error" );
721
+ break ;
722
+ }
620
723
}
621
724
622
725
/*
@@ -679,7 +782,8 @@ SelectLRUCLOGPage(int pageno)
679
782
* the read to complete.
680
783
*/
681
784
if (ClogCtl -> page_status [bestslot ]== CLOG_PAGE_READ_IN_PROGRESS )
682
- (void )ReadCLOGPage (ClogCtl -> page_number [bestslot ]);
785
+ (void )ReadCLOGPage (ClogCtl -> page_number [bestslot ],
786
+ InvalidTransactionId );
683
787
else
684
788
WriteCLOGPage (bestslot );
685
789
@@ -857,7 +961,8 @@ restart:;
857
961
* This is the same logic as in SelectLRUCLOGPage.
858
962
*/
859
963
if (ClogCtl -> page_status [slotno ]== CLOG_PAGE_READ_IN_PROGRESS )
860
- (void )ReadCLOGPage (ClogCtl -> page_number [slotno ]);
964
+ (void )ReadCLOGPage (ClogCtl -> page_number [slotno ],
965
+ InvalidTransactionId );
861
966
else
862
967
WriteCLOGPage (slotno );
863
968
gotorestart ;
@@ -886,7 +991,7 @@ ScanCLOGDirectory(int cutoffPage, bool doDeletions)
886
991
887
992
cldir = opendir (ClogDir );
888
993
if (cldir == NULL )
889
- elog (PANIC ,"could not open transaction-commit log directory (%s): %m" ,
994
+ elog (ERROR ,"could not open transaction-commit log directory (%s): %m" ,
890
995
ClogDir );
891
996
892
997
errno = 0 ;
@@ -911,7 +1016,7 @@ ScanCLOGDirectory(int cutoffPage, bool doDeletions)
911
1016
errno = 0 ;
912
1017
}
913
1018
if (errno )
914
- elog (PANIC ,"could not read transaction-commit log directory (%s): %m" ,
1019
+ elog (ERROR ,"could not read transaction-commit log directory (%s): %m" ,
915
1020
ClogDir );
916
1021
closedir (cldir );
917
1022