@@ -83,6 +83,9 @@ static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeo
8383static bool sendFile (bbsink * sink ,const char * readfilename ,const char * tarfilename ,
8484struct stat * statbuf ,bool missing_ok ,Oid dboid ,
8585backup_manifest_info * manifest ,const char * spcoid );
86+ static bool verify_page_checksum (Page page ,XLogRecPtr start_lsn ,
87+ BlockNumber blkno ,
88+ uint16 * expected_checksum );
8689static void sendFileWithContent (bbsink * sink ,const char * filename ,
8790const char * content ,
8891backup_manifest_info * manifest );
@@ -1485,14 +1488,11 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
14851488{
14861489int fd ;
14871490BlockNumber blkno = 0 ;
1488- bool block_retry = false;
1489- uint16 checksum ;
14901491int checksum_failures = 0 ;
14911492off_t cnt ;
14921493int i ;
14931494pgoff_t len = 0 ;
14941495char * page ;
1495- PageHeader phdr ;
14961496int segmentno = 0 ;
14971497char * segmentpath ;
14981498bool verify_checksum = false;
@@ -1582,94 +1582,78 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
15821582{
15831583for (i = 0 ;i < cnt /BLCKSZ ;i ++ )
15841584{
1585+ int reread_cnt ;
1586+ uint16 expected_checksum ;
1587+
15851588page = sink -> bbs_buffer + BLCKSZ * i ;
15861589
1590+ /* If the page is OK, go on to the next one. */
1591+ if (verify_page_checksum (page ,sink -> bbs_state -> startptr ,
1592+ blkno + i + segmentno * RELSEG_SIZE ,
1593+ & expected_checksum ))
1594+ continue ;
1595+
15871596/*
1588- * Only check pages which have not been modified since the
1589- * start of the base backup. Otherwise, they might have been
1590- * written only halfway and the checksum would not be valid.
1591- * However, replaying WAL would reinstate the correct page in
1592- * this case. We also skip completely new pages, since they
1593- * don't have a checksum yet.
1597+ * Retry the block on the first failure. It's possible that
1598+ * we read the first 4K page of the block just before postgres
1599+ * updated the entire block so it ends up looking torn to us.
1600+ * If, before we retry the read, the concurrent write of the
1601+ * block finishes, the page LSN will be updated and we'll
1602+ * realize that we should ignore this block.
1603+ *
1604+ * There's no guarantee that this will actually happen,
1605+ * though: the torn write could take an arbitrarily long time
1606+ * to complete. Retrying multiple times wouldn't fix this
1607+ * problem, either, though it would reduce the chances of it
1608+ * happening in practice. The only real fix here seems to be
1609+ * to have some kind of interlock that allows us to wait until
1610+ * we can be certain that no write to the block is in
1611+ * progress. Since we don't have any such thing right now, we
1612+ * just do this and hope for the best.
15941613 */
1595- if (!PageIsNew (page )&& PageGetLSN (page )< sink -> bbs_state -> startptr )
1614+ reread_cnt =
1615+ basebackup_read_file (fd ,
1616+ sink -> bbs_buffer + BLCKSZ * i ,
1617+ BLCKSZ ,len + BLCKSZ * i ,
1618+ readfilename ,
1619+ false);
1620+ if (reread_cnt == 0 )
15961621{
1597- checksum = pg_checksum_page ((char * )page ,blkno + segmentno * RELSEG_SIZE );
1598- phdr = (PageHeader )page ;
1599- if (phdr -> pd_checksum != checksum )
1600- {
1601- /*
1602- * Retry the block on the first failure. It's
1603- * possible that we read the first 4K page of the
1604- * block just before postgres updated the entire block
1605- * so it ends up looking torn to us. If, before we
1606- * retry the read, the concurrent write of the block
1607- * finishes, the page LSN will be updated and we'll
1608- * realize that we should ignore this block.
1609- *
1610- * There's no guarantee that this will actually
1611- * happen, though: the torn write could take an
1612- * arbitrarily long time to complete. Retrying
1613- * multiple times wouldn't fix this problem, either,
1614- * though it would reduce the chances of it happening
1615- * in practice. The only real fix here seems to be to
1616- * have some kind of interlock that allows us to wait
1617- * until we can be certain that no write to the block
1618- * is in progress. Since we don't have any such thing
1619- * right now, we just do this and hope for the best.
1620- */
1621- if (block_retry == false)
1622- {
1623- int reread_cnt ;
1624-
1625- /* Reread the failed block */
1626- reread_cnt =
1627- basebackup_read_file (fd ,
1628- sink -> bbs_buffer + BLCKSZ * i ,
1629- BLCKSZ ,len + BLCKSZ * i ,
1630- readfilename ,
1631- false);
1632- if (reread_cnt == 0 )
1633- {
1634- /*
1635- * If we hit end-of-file, a concurrent
1636- * truncation must have occurred, so break out
1637- * of this loop just as if the initial fread()
1638- * returned 0. We'll drop through to the same
1639- * code that handles that case. (We must fix
1640- * up cnt first, though.)
1641- */
1642- cnt = BLCKSZ * i ;
1643- break ;
1644- }
1645-
1646- /* Set flag so we know a retry was attempted */
1647- block_retry = true;
1648-
1649- /* Reset loop to validate the block again */
1650- i -- ;
1651- continue ;
1652- }
1653-
1654- checksum_failures ++ ;
1655-
1656- if (checksum_failures <=5 )
1657- ereport (WARNING ,
1658- (errmsg ("checksum verification failed in "
1659- "file \"%s\", block %u: calculated "
1660- "%X but expected %X" ,
1661- readfilename ,blkno ,checksum ,
1662- phdr -> pd_checksum )));
1663- if (checksum_failures == 5 )
1664- ereport (WARNING ,
1665- (errmsg ("further checksum verification "
1666- "failures in file \"%s\" will not "
1667- "be reported" ,readfilename )));
1668- }
1622+ /*
1623+ * If we hit end-of-file, a concurrent truncation must
1624+ * have occurred, so break out of this loop just as if the
1625+ * initial fread() returned 0. We'll drop through to the
1626+ * same code that handles that case. (We must fix up cnt
1627+ * first, though.)
1628+ */
1629+ cnt = BLCKSZ * i ;
1630+ break ;
16691631}
1670- block_retry = false;
1671- blkno ++ ;
1632+
1633+ /* If the page now looks OK, go on to the next one. */
1634+ if (verify_page_checksum (page ,sink -> bbs_state -> startptr ,
1635+ blkno + i + segmentno * RELSEG_SIZE ,
1636+ & expected_checksum ))
1637+ continue ;
1638+
1639+ /* Handle checksum failure. */
1640+ checksum_failures ++ ;
1641+ if (checksum_failures <=5 )
1642+ ereport (WARNING ,
1643+ (errmsg ("checksum verification failed in "
1644+ "file \"%s\", block %u: calculated "
1645+ "%X but expected %X" ,
1646+ readfilename ,blkno + i ,expected_checksum ,
1647+ ((PageHeader )page )-> pd_checksum )));
1648+ if (checksum_failures == 5 )
1649+ ereport (WARNING ,
1650+ (errmsg ("further checksum verification "
1651+ "failures in file \"%s\" will not "
1652+ "be reported" ,readfilename )));
16721653}
1654+
1655+ /* Update block number for next pass through the outer loop. */
1656+ blkno += i ;
16731657}
16741658
16751659/*
@@ -1734,6 +1718,42 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
17341718return true;
17351719}
17361720
1721+ /*
1722+ * Try to verify the checksum for the provided page, if it seems appropriate
1723+ * to do so.
1724+ *
1725+ * Returns true if verification succeeds or if we decide not to check it,
1726+ * and false if verification fails. When return false, it also sets
1727+ * *expected_checksum to the computed value.
1728+ */
1729+ static bool
1730+ verify_page_checksum (Page page ,XLogRecPtr start_lsn ,BlockNumber blkno ,
1731+ uint16 * expected_checksum )
1732+ {
1733+ PageHeader phdr ;
1734+ uint16 checksum ;
1735+
1736+ /*
1737+ * Only check pages which have not been modified since the start of the
1738+ * base backup. Otherwise, they might have been written only halfway and
1739+ * the checksum would not be valid. However, replaying WAL would
1740+ * reinstate the correct page in this case. We also skip completely new
1741+ * pages, since they don't have a checksum yet.
1742+ */
1743+ if (PageIsNew (page )|| PageGetLSN (page ) >=start_lsn )
1744+ return true;
1745+
1746+ /* Perform the actual checksum calculation. */
1747+ checksum = pg_checksum_page (page ,blkno );
1748+
1749+ /* See whether it matches the value from the page. */
1750+ phdr = (PageHeader )page ;
1751+ if (phdr -> pd_checksum == checksum )
1752+ return true;
1753+ * expected_checksum = checksum ;
1754+ return false;
1755+ }
1756+
17371757static int64
17381758_tarWriteHeader (bbsink * sink ,const char * filename ,const char * linktarget ,
17391759struct stat * statbuf ,bool sizeonly )