Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc2ba3fd

Browse files
committed
In basebackup.c, refactor to create read_file_data_into_buffer.
This further reduces the length and complexity of sendFile(),hopefully make it easier to understand and modify. In additionto moving some logic into a new function, I took this opportunityto make a few slight adjustments to sendFile() itself, includingrenaming the 'len' variable to 'bytes_done', since we use it to representthe number of bytes we've already handled so far, not the totallength of the file.Patch by me, reviewed by David Steele.Discussion:http://postgr.es/m/CA+TgmoYt5jXH4U6cu1dm9Oe2FTn1aae6hBNhZzJJjyjbE_zYig@mail.gmail.com
1 parent0531831 commitc2ba3fd

File tree

1 file changed

+133
-98
lines changed

1 file changed

+133
-98
lines changed

‎src/backend/backup/basebackup.c

Lines changed: 133 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeo
8383
staticboolsendFile(bbsink*sink,constchar*readfilename,constchar*tarfilename,
8484
structstat*statbuf,boolmissing_ok,Oiddboid,
8585
backup_manifest_info*manifest,constchar*spcoid);
86+
staticoff_tread_file_data_into_buffer(bbsink*sink,
87+
constchar*readfilename,intfd,
88+
off_toffset,size_tlength,
89+
BlockNumberblkno,
90+
boolverify_checksum,
91+
int*checksum_failures);
8692
staticboolverify_page_checksum(Pagepage,XLogRecPtrstart_lsn,
8793
BlockNumberblkno,
8894
uint16*expected_checksum);
@@ -1490,9 +1496,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
14901496
BlockNumberblkno=0;
14911497
intchecksum_failures=0;
14921498
off_tcnt;
1493-
inti;
1494-
pgoff_tlen=0;
1495-
char*page;
1499+
pgoff_tbytes_done=0;
14961500
intsegmentno=0;
14971501
char*segmentpath;
14981502
boolverify_checksum= false;
@@ -1514,6 +1518,12 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
15141518

15151519
_tarWriteHeader(sink,tarfilename,NULL,statbuf, false);
15161520

1521+
/*
1522+
* Checksums are verified in multiples of BLCKSZ, so the buffer length
1523+
* should be a multiple of the block size as well.
1524+
*/
1525+
Assert((sink->bbs_buffer_length %BLCKSZ)==0);
1526+
15171527
if (!noverify_checksums&&DataChecksumsEnabled())
15181528
{
15191529
char*filename;
@@ -1551,23 +1561,21 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
15511561
* for a base backup we can ignore such extended data. It will be restored
15521562
* from WAL.
15531563
*/
1554-
while (len<statbuf->st_size)
1564+
while (bytes_done<statbuf->st_size)
15551565
{
1556-
size_tremaining=statbuf->st_size-len;
1566+
size_tremaining=statbuf->st_size-bytes_done;
15571567

15581568
/* Try to read some more data. */
1559-
cnt=basebackup_read_file(fd,sink->bbs_buffer,
1560-
Min(sink->bbs_buffer_length,remaining),
1561-
len,readfilename, true);
1569+
cnt=read_file_data_into_buffer(sink,readfilename,fd,bytes_done,
1570+
remaining,
1571+
blkno+segmentno*RELSEG_SIZE,
1572+
verify_checksum,
1573+
&checksum_failures);
15621574

15631575
/*
1564-
* The checksums are verified at block level, so we iterate over the
1565-
* buffer in chunks of BLCKSZ, after making sure that
1566-
* TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
1567-
* BLCKSZ bytes.
1576+
* If the amount of data we were able to read was not a multiple of
1577+
* BLCKSZ, we cannot verify checksums, which are block-level.
15681578
*/
1569-
Assert((sink->bbs_buffer_length %BLCKSZ)==0);
1570-
15711579
if (verify_checksum&& (cnt %BLCKSZ!=0))
15721580
{
15731581
ereport(WARNING,
@@ -1578,84 +1586,6 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
15781586
verify_checksum= false;
15791587
}
15801588

1581-
if (verify_checksum)
1582-
{
1583-
for (i=0;i<cnt /BLCKSZ;i++)
1584-
{
1585-
intreread_cnt;
1586-
uint16expected_checksum;
1587-
1588-
page=sink->bbs_buffer+BLCKSZ*i;
1589-
1590-
/* If the page is OK, go on to the next one. */
1591-
if (verify_page_checksum(page,sink->bbs_state->startptr,
1592-
blkno+i+segmentno*RELSEG_SIZE,
1593-
&expected_checksum))
1594-
continue;
1595-
1596-
/*
1597-
* Retry the block on the first failure. It's possible that
1598-
* we read the first 4K page of the block just before postgres
1599-
* updated the entire block so it ends up looking torn to us.
1600-
* If, before we retry the read, the concurrent write of the
1601-
* block finishes, the page LSN will be updated and we'll
1602-
* realize that we should ignore this block.
1603-
*
1604-
* There's no guarantee that this will actually happen,
1605-
* though: the torn write could take an arbitrarily long time
1606-
* to complete. Retrying multiple times wouldn't fix this
1607-
* problem, either, though it would reduce the chances of it
1608-
* happening in practice. The only real fix here seems to be
1609-
* to have some kind of interlock that allows us to wait until
1610-
* we can be certain that no write to the block is in
1611-
* progress. Since we don't have any such thing right now, we
1612-
* just do this and hope for the best.
1613-
*/
1614-
reread_cnt=
1615-
basebackup_read_file(fd,
1616-
sink->bbs_buffer+BLCKSZ*i,
1617-
BLCKSZ,len+BLCKSZ*i,
1618-
readfilename,
1619-
false);
1620-
if (reread_cnt==0)
1621-
{
1622-
/*
1623-
* If we hit end-of-file, a concurrent truncation must
1624-
* have occurred, so break out of this loop just as if the
1625-
* initial fread() returned 0. We'll drop through to the
1626-
* same code that handles that case. (We must fix up cnt
1627-
* first, though.)
1628-
*/
1629-
cnt=BLCKSZ*i;
1630-
break;
1631-
}
1632-
1633-
/* If the page now looks OK, go on to the next one. */
1634-
if (verify_page_checksum(page,sink->bbs_state->startptr,
1635-
blkno+i+segmentno*RELSEG_SIZE,
1636-
&expected_checksum))
1637-
continue;
1638-
1639-
/* Handle checksum failure. */
1640-
checksum_failures++;
1641-
if (checksum_failures <=5)
1642-
ereport(WARNING,
1643-
(errmsg("checksum verification failed in "
1644-
"file \"%s\", block %u: calculated "
1645-
"%X but expected %X",
1646-
readfilename,blkno+i,expected_checksum,
1647-
((PageHeader)page)->pd_checksum)));
1648-
if (checksum_failures==5)
1649-
ereport(WARNING,
1650-
(errmsg("further checksum verification "
1651-
"failures in file \"%s\" will not "
1652-
"be reported",readfilename)));
1653-
}
1654-
1655-
/* Update block number for next pass through the outer loop. */
1656-
blkno+=i;
1657-
}
1658-
16591589
/*
16601590
* If we hit end-of-file, a concurrent truncation must have occurred.
16611591
* That's not an error condition, because WAL replay will fix things
@@ -1664,21 +1594,23 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
16641594
if (cnt==0)
16651595
break;
16661596

1597+
/* Update block number and # of bytes done for next loop iteration. */
1598+
blkno+=cnt /BLCKSZ;
1599+
bytes_done+=cnt;
1600+
16671601
/* Archive the data we just read. */
16681602
bbsink_archive_contents(sink,cnt);
16691603

16701604
/* Also feed it to the checksum machinery. */
16711605
if (pg_checksum_update(&checksum_ctx,
16721606
(uint8*)sink->bbs_buffer,cnt)<0)
16731607
elog(ERROR,"could not update checksum of base backup");
1674-
1675-
len+=cnt;
16761608
}
16771609

16781610
/* If the file was truncated while we were sending it, pad it with zeros */
1679-
while (len<statbuf->st_size)
1611+
while (bytes_done<statbuf->st_size)
16801612
{
1681-
size_tremaining=statbuf->st_size-len;
1613+
size_tremaining=statbuf->st_size-bytes_done;
16821614
size_tnbytes=Min(sink->bbs_buffer_length,remaining);
16831615

16841616
MemSet(sink->bbs_buffer,0,nbytes);
@@ -1687,15 +1619,15 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
16871619
nbytes)<0)
16881620
elog(ERROR,"could not update checksum of base backup");
16891621
bbsink_archive_contents(sink,nbytes);
1690-
len+=nbytes;
1622+
bytes_done+=nbytes;
16911623
}
16921624

16931625
/*
16941626
* Pad to a block boundary, per tar format requirements. (This small piece
16951627
* of data is probably not worth throttling, and is not checksummed
16961628
* because it's not actually part of the file.)
16971629
*/
1698-
_tarWritePadding(sink,len);
1630+
_tarWritePadding(sink,bytes_done);
16991631

17001632
CloseTransientFile(fd);
17011633

@@ -1718,6 +1650,109 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
17181650
return true;
17191651
}
17201652

1653+
/*
1654+
* Read some more data from the file into the bbsink's buffer, verifying
1655+
* checksums as required.
1656+
*
1657+
* 'offset' is the file offset from which we should begin to read, and
1658+
* 'length' is the amount of data that should be read. The actual amount
1659+
* of data read will be less than the requested amount if the bbsink's
1660+
* buffer isn't big enough to hold it all, or if the underlying file has
1661+
* been truncated. The return value is the number of bytes actually read.
1662+
*
1663+
* 'blkno' is the block number of the first page in the bbsink's buffer
1664+
* relative to the start of the relation.
1665+
*
1666+
* 'verify_checksum' indicates whether we should try to verify checksums
1667+
* for the blocks we read. If we do this, we'll update *checksum_failures
1668+
* and issue warnings as appropriate.
1669+
*/
1670+
staticoff_t
1671+
read_file_data_into_buffer(bbsink*sink,constchar*readfilename,intfd,
1672+
off_toffset,size_tlength,BlockNumberblkno,
1673+
boolverify_checksum,int*checksum_failures)
1674+
{
1675+
off_tcnt;
1676+
inti;
1677+
char*page;
1678+
1679+
/* Try to read some more data. */
1680+
cnt=basebackup_read_file(fd,sink->bbs_buffer,
1681+
Min(sink->bbs_buffer_length,length),
1682+
offset,readfilename, true);
1683+
1684+
/* Can't verify checksums if read length is not a multiple of BLCKSZ. */
1685+
if (!verify_checksum|| (cnt %BLCKSZ)!=0)
1686+
returncnt;
1687+
1688+
/* Verify checksum for each block. */
1689+
for (i=0;i<cnt /BLCKSZ;i++)
1690+
{
1691+
intreread_cnt;
1692+
uint16expected_checksum;
1693+
1694+
page=sink->bbs_buffer+BLCKSZ*i;
1695+
1696+
/* If the page is OK, go on to the next one. */
1697+
if (verify_page_checksum(page,sink->bbs_state->startptr,blkno+i,
1698+
&expected_checksum))
1699+
continue;
1700+
1701+
/*
1702+
* Retry the block on the first failure. It's possible that we read
1703+
* the first 4K page of the block just before postgres updated the
1704+
* entire block so it ends up looking torn to us. If, before we retry
1705+
* the read, the concurrent write of the block finishes, the page LSN
1706+
* will be updated and we'll realize that we should ignore this block.
1707+
*
1708+
* There's no guarantee that this will actually happen, though: the
1709+
* torn write could take an arbitrarily long time to complete.
1710+
* Retrying multiple times wouldn't fix this problem, either, though
1711+
* it would reduce the chances of it happening in practice. The only
1712+
* real fix here seems to be to have some kind of interlock that
1713+
* allows us to wait until we can be certain that no write to the
1714+
* block is in progress. Since we don't have any such thing right now,
1715+
* we just do this and hope for the best.
1716+
*/
1717+
reread_cnt=
1718+
basebackup_read_file(fd,sink->bbs_buffer+BLCKSZ*i,
1719+
BLCKSZ,offset+BLCKSZ*i,
1720+
readfilename, false);
1721+
if (reread_cnt==0)
1722+
{
1723+
/*
1724+
* If we hit end-of-file, a concurrent truncation must have
1725+
* occurred, so reduce cnt to reflect only the blocks already
1726+
* processed and break out of this loop.
1727+
*/
1728+
cnt=BLCKSZ*i;
1729+
break;
1730+
}
1731+
1732+
/* If the page now looks OK, go on to the next one. */
1733+
if (verify_page_checksum(page,sink->bbs_state->startptr,blkno+i,
1734+
&expected_checksum))
1735+
continue;
1736+
1737+
/* Handle checksum failure. */
1738+
(*checksum_failures)++;
1739+
if (*checksum_failures <=5)
1740+
ereport(WARNING,
1741+
(errmsg("checksum verification failed in "
1742+
"file \"%s\", block %u: calculated "
1743+
"%X but expected %X",
1744+
readfilename,blkno+i,expected_checksum,
1745+
((PageHeader)page)->pd_checksum)));
1746+
if (*checksum_failures==5)
1747+
ereport(WARNING,
1748+
(errmsg("further checksum verification "
1749+
"failures in file \"%s\" will not "
1750+
"be reported",readfilename)));
1751+
}
1752+
1753+
returncnt;
1754+
}
1755+
17211756
/*
17221757
* Try to verify the checksum for the provided page, if it seems appropriate
17231758
* to do so.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp