88 *
99 *
1010 * IDENTIFICATION
11- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.83 2001/04/02 23: 20:24 tgl Exp $
11+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.84 2001/05/10 20:38:49 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -54,10 +54,9 @@ typedef struct _MdfdVec
5454int mdfd_flags ;/* fd status flags */
5555
5656/* these are the assigned bits in mdfd_flags: */
57- #define MDFD_FREE (1 << 0)/* unused entry */
57+ #define MDFD_FREE (1 << 0)/* unused entry */
5858
59- int mdfd_lstbcnt ;/* most recent block count */
60- int mdfd_nextFree ;/* next free vector */
59+ int mdfd_nextFree ;/* link to next freelist member, if free */
6160#ifndef LET_OS_MANAGE_FILESIZE
6261struct _MdfdVec * mdfd_chain ;/* for large relations */
6362#endif
@@ -164,7 +163,6 @@ mdcreate(Relation reln)
164163
165164Md_fdvec [vfd ].mdfd_vfd = fd ;
166165Md_fdvec [vfd ].mdfd_flags = (uint16 )0 ;
167- Md_fdvec [vfd ].mdfd_lstbcnt = 0 ;
168166#ifndef LET_OS_MANAGE_FILESIZE
169167Md_fdvec [vfd ].mdfd_chain = (MdfdVec * )NULL ;
170168#endif
@@ -225,52 +223,69 @@ mdunlink(RelFileNode rnode)
225223/*
226224 *mdextend() -- Add a block to the specified relation.
227225 *
226+ *The semantics are basically the same as mdwrite(): write at the
227+ *specified position. However, we are expecting to extend the
228+ *relation (ie, blocknum is the current EOF), and so in case of
229+ *failure we clean up by truncating.
230+ *
228231 *This routine returns SM_FAIL or SM_SUCCESS, with errno set as
229232 *appropriate.
233+ *
234+ * Note: this routine used to call mdnblocks() to get the block position
235+ * to write at, but that's pretty silly since the caller needs to know where
236+ * the block will be written, and accordingly must have done mdnblocks()
237+ * already. Might as well pass in the position and save a seek.
230238 */
231239int
232- mdextend (Relation reln ,char * buffer )
240+ mdextend (Relation reln ,BlockNumber blocknum , char * buffer )
233241{
234- long pos ,
235- nbytes ;
236- int nblocks ;
242+ long seekpos ;
243+ int nbytes ;
237244MdfdVec * v ;
238245
239- nblocks = mdnblocks (reln );
240- v = _mdfd_getseg (reln ,nblocks );
246+ v = _mdfd_getseg (reln ,blocknum );
241247
242- if ((pos = FileSeek (v -> mdfd_vfd ,0L ,SEEK_END ))< 0 )
243- return SM_FAIL ;
248+ #ifndef LET_OS_MANAGE_FILESIZE
249+ seekpos = (long ) (BLCKSZ * (blocknum %RELSEG_SIZE ));
250+ #ifdef DIAGNOSTIC
251+ if (seekpos >=BLCKSZ * RELSEG_SIZE )
252+ elog (FATAL ,"seekpos too big!" );
253+ #endif
254+ #else
255+ seekpos = (long ) (BLCKSZ * (blocknum ));
256+ #endif
244257
245- if (pos %BLCKSZ != 0 )/* the last block is incomplete */
246- {
247- pos -= pos %BLCKSZ ;
248- if (FileSeek (v -> mdfd_vfd ,pos ,SEEK_SET )< 0 )
249- return SM_FAIL ;
250- }
258+ /*
259+ * Note: because caller obtained blocknum by calling mdnblocks, which
260+ * did a seek(SEEK_END), this seek is often redundant and will be
261+ * optimized away by fd.c. It's not redundant, however, if there is a
262+ * partial page at the end of the file. In that case we want to try to
263+ * overwrite the partial page with a full page. It's also not redundant
264+ * if bufmgr.c had to dump another buffer of the same file to make room
265+ * for the new page's buffer.
266+ */
267+ if (FileSeek (v -> mdfd_vfd ,seekpos ,SEEK_SET )!= seekpos )
268+ return SM_FAIL ;
251269
252270if ((nbytes = FileWrite (v -> mdfd_vfd ,buffer ,BLCKSZ ))!= BLCKSZ )
253271{
254272if (nbytes > 0 )
255273{
256- FileTruncate (v -> mdfd_vfd ,pos );
257- FileSeek (v -> mdfd_vfd ,pos ,SEEK_SET );
274+ int save_errno = errno ;
275+
276+ /* Remove the partially-written page */
277+ FileTruncate (v -> mdfd_vfd ,seekpos );
278+ FileSeek (v -> mdfd_vfd ,seekpos ,SEEK_SET );
279+ errno = save_errno ;
258280}
259281return SM_FAIL ;
260282}
261283
262- /* try to keep the last block count current, though it's just a hint */
263284#ifndef LET_OS_MANAGE_FILESIZE
264- if ((v -> mdfd_lstbcnt = (++ nblocks %RELSEG_SIZE ))== 0 )
265- v -> mdfd_lstbcnt = RELSEG_SIZE ;
266-
267285#ifdef DIAGNOSTIC
268- if (_mdnblocks (v -> mdfd_vfd ,BLCKSZ )> RELSEG_SIZE
269- || v -> mdfd_lstbcnt > RELSEG_SIZE )
286+ if (_mdnblocks (v -> mdfd_vfd ,BLCKSZ )> RELSEG_SIZE )
270287elog (FATAL ,"segment too big!" );
271288#endif
272- #else
273- v -> mdfd_lstbcnt = ++ nblocks ;
274289#endif
275290
276291return SM_SUCCESS ;
@@ -319,12 +334,11 @@ mdopen(Relation reln)
319334
320335Md_fdvec [vfd ].mdfd_vfd = fd ;
321336Md_fdvec [vfd ].mdfd_flags = (uint16 )0 ;
322- Md_fdvec [vfd ].mdfd_lstbcnt = _mdnblocks (fd ,BLCKSZ );
323337#ifndef LET_OS_MANAGE_FILESIZE
324338Md_fdvec [vfd ].mdfd_chain = (MdfdVec * )NULL ;
325339
326340#ifdef DIAGNOSTIC
327- if (Md_fdvec [ vfd ]. mdfd_lstbcnt > RELSEG_SIZE )
341+ if (_mdnblocks ( fd , BLCKSZ ) > RELSEG_SIZE )
328342elog (FATAL ,"segment too big on relopen!" );
329343#endif
330344#endif
@@ -440,9 +454,12 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer)
440454status = SM_SUCCESS ;
441455if ((nbytes = FileRead (v -> mdfd_vfd ,buffer ,BLCKSZ ))!= BLCKSZ )
442456{
443- if (nbytes == 0 )
444- MemSet (buffer ,0 ,BLCKSZ );
445- else if (blocknum == 0 && nbytes > 0 && mdnblocks (reln )== 0 )
457+ /*
458+ * If we are at EOF, return zeroes without complaining.
459+ * (XXX Is this still necessary/a good idea??)
460+ */
461+ if (nbytes == 0 ||
462+ (nbytes > 0 && mdnblocks (reln )== blocknum ))
446463MemSet (buffer ,0 ,BLCKSZ );
447464else
448465status = SM_FAIL ;
@@ -459,7 +476,6 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer)
459476int
460477mdwrite (Relation reln ,BlockNumber blocknum ,char * buffer )
461478{
462- int status ;
463479long seekpos ;
464480MdfdVec * v ;
465481
@@ -478,11 +494,10 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
478494if (FileSeek (v -> mdfd_vfd ,seekpos ,SEEK_SET )!= seekpos )
479495return SM_FAIL ;
480496
481- status = SM_SUCCESS ;
482497if (FileWrite (v -> mdfd_vfd ,buffer ,BLCKSZ )!= BLCKSZ )
483- status = SM_FAIL ;
498+ return SM_FAIL ;
484499
485- return status ;
500+ return SM_SUCCESS ;
486501}
487502
488503/*
@@ -662,31 +677,29 @@ mdnblocks(Relation reln)
662677nblocks = _mdnblocks (v -> mdfd_vfd ,BLCKSZ );
663678if (nblocks > RELSEG_SIZE )
664679elog (FATAL ,"segment too big in mdnblocks!" );
665- v -> mdfd_lstbcnt = nblocks ;
666- if (nblocks == RELSEG_SIZE )
667- {
668- segno ++ ;
680+ if (nblocks < RELSEG_SIZE )
681+ return (segno * RELSEG_SIZE )+ nblocks ;
682+ /*
683+ * If segment is exactly RELSEG_SIZE, advance to next one.
684+ */
685+ segno ++ ;
669686
687+ if (v -> mdfd_chain == (MdfdVec * )NULL )
688+ {
689+ /*
690+ * Because we pass O_CREAT, we will create the next
691+ * segment (with zero length) immediately, if the last
692+ * segment is of length REL_SEGSIZE. This is unnecessary
693+ * but harmless, and testing for the case would take more
694+ * cycles than it seems worth.
695+ */
696+ v -> mdfd_chain = _mdfd_openseg (reln ,segno ,O_CREAT );
670697if (v -> mdfd_chain == (MdfdVec * )NULL )
671- {
672-
673- /*
674- * Because we pass O_CREAT, we will create the next
675- * segment (with zero length) immediately, if the last
676- * segment is of length REL_SEGSIZE. This is unnecessary
677- * but harmless, and testing for the case would take more
678- * cycles than it seems worth.
679- */
680- v -> mdfd_chain = _mdfd_openseg (reln ,segno ,O_CREAT );
681- if (v -> mdfd_chain == (MdfdVec * )NULL )
682- elog (ERROR ,"cannot count blocks for %s -- open failed: %m" ,
683- RelationGetRelationName (reln ));
684- }
685-
686- v = v -> mdfd_chain ;
698+ elog (ERROR ,"cannot count blocks for %s -- open failed: %m" ,
699+ RelationGetRelationName (reln ));
687700}
688- else
689- return ( segno * RELSEG_SIZE ) + nblocks ;
701+
702+ v = v -> mdfd_chain ;
690703}
691704#else
692705return _mdnblocks (v -> mdfd_vfd ,BLCKSZ );
@@ -761,7 +774,6 @@ mdtruncate(Relation reln, int nblocks)
761774
762775if (FileTruncate (v -> mdfd_vfd ,lastsegblocks * BLCKSZ )< 0 )
763776return -1 ;
764- v -> mdfd_lstbcnt = lastsegblocks ;
765777v = v -> mdfd_chain ;
766778ov -> mdfd_chain = (MdfdVec * )NULL ;
767779}
@@ -779,7 +791,6 @@ mdtruncate(Relation reln, int nblocks)
779791#else
780792if (FileTruncate (v -> mdfd_vfd ,nblocks * BLCKSZ )< 0 )
781793return -1 ;
782- v -> mdfd_lstbcnt = nblocks ;
783794#endif
784795
785796return nblocks ;
@@ -958,13 +969,12 @@ _mdfd_openseg(Relation reln, int segno, int oflags)
958969/* fill the entry */
959970v -> mdfd_vfd = fd ;
960971v -> mdfd_flags = (uint16 )0 ;
961- v -> mdfd_lstbcnt = _mdnblocks (fd ,BLCKSZ );
962972#ifndef LET_OS_MANAGE_FILESIZE
963973v -> mdfd_chain = (MdfdVec * )NULL ;
964974
965975#ifdef DIAGNOSTIC
966- if (v -> mdfd_lstbcnt > RELSEG_SIZE )
967- elog (FATAL ,"segment too big onopen !" );
976+ if (_mdnblocks ( fd , BLCKSZ ) > RELSEG_SIZE )
977+ elog (FATAL ,"segment too big onopenseg !" );
968978#endif
969979#endif
970980