Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit72a98a6

Browse files
committed
Don't open formally non-existent segments in _mdfd_getseg().
Before this commit _mdfd_getseg(), in contrast to mdnblocks(), did notverify whether all segments leading up to the to-be-opened one, wereRELSEG_SIZE sized. That is e.g. not the case after truncating arelation, because later segments just get truncated to zero length, notremoved.Once a "non-existent" segment has been opened in a session, mdnblocks()will return wrong results, causing errors like "could not read block %uin file" when accessing blocks. Closing the session, or the laterarrival of relevant invalidation messages, would "fix" the problem.That, so far, was mostly harmless, because most segment accesses areonly done after an mdnblocks() call. But since428b1d6 we try toopen segments that might have been deleted, to trigger kernel writebackfrom a backend's queue of recent writes.To fix check segment sizes in _mdfd_getseg() when opening previouslyunopened segments. In practice this shouldn't imply a lot of additionallseek() calls, because mdnblocks() will most of the time already haveopened all relevant segments.This commit also fixes a second problem, namely that _mdfd_getseg(EXTENSION_RETURN_NULL) extends files during recovery, which is notdesirable for the mdwriteback() case. Add EXTENSION_REALLY_RETURN_NULL,which does not behave that way, and use it.Reported-By: Thom BrownAuthor: Andres Freund, Abhijit Menon-SenReviewd-By: Robert Haas, Fabien CoehloDiscussion: CAA-aLv6Dp_ZsV-44QA-2zgkqWKQq=GedBX2dRSrWpxqovXK=Pg@mail.gmail.comFixes:428b1d6
1 parentc6ff84b commit72a98a6

File tree

1 file changed

+69
-27
lines changed
  • src/backend/storage/smgr

1 file changed

+69
-27
lines changed

‎src/backend/storage/smgr/md.c

Lines changed: 69 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,14 @@ static CycleCtr mdckpt_cycle_ctr = 0;
165165

166166
typedefenum/* behavior for mdopen & _mdfd_getseg */
167167
{
168-
EXTENSION_FAIL,/* ereport if segment not present */
169-
EXTENSION_RETURN_NULL,/* return NULL if not present */
170-
EXTENSION_CREATE/* create new segments as needed */
168+
/* ereport if segment not present, create in recovery */
169+
EXTENSION_FAIL,
170+
/* return NULL if not present, create in recovery */
171+
EXTENSION_RETURN_NULL,
172+
/* return NULL if not present */
173+
EXTENSION_REALLY_RETURN_NULL,
174+
/* create new segments as needed */
175+
EXTENSION_CREATE
171176
}ExtensionBehavior;
172177

173178
/* local routines */
@@ -591,7 +596,8 @@ mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior)
591596
fd=PathNameOpenFile(path,O_RDWR |O_CREAT |O_EXCL |PG_BINARY,0600);
592597
if (fd<0)
593598
{
594-
if (behavior==EXTENSION_RETURN_NULL&&
599+
if ((behavior==EXTENSION_RETURN_NULL||
600+
behavior==EXTENSION_REALLY_RETURN_NULL)&&
595601
FILE_POSSIBLY_DELETED(errno))
596602
{
597603
pfree(path);
@@ -685,7 +691,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
685691
segnum_end;
686692

687693
v=_mdfd_getseg(reln,forknum,blocknum, false,
688-
EXTENSION_RETURN_NULL);
694+
EXTENSION_REALLY_RETURN_NULL);
689695

690696
/*
691697
* We might be flushing buffers of already removed relations, that's
@@ -1774,7 +1780,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
17741780
BlockNumbernextsegno;
17751781

17761782
if (!v)
1777-
returnNULL;/*only possibleifEXTENSION_RETURN_NULL */
1783+
returnNULL;/* ifEXTENSION_(REALLY_)RETURN_NULL */
17781784

17791785
targetseg=blkno / ((BlockNumber)RELSEG_SIZE);
17801786
for (nextsegno=1;nextsegno <=targetseg;nextsegno++)
@@ -1783,23 +1789,34 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
17831789

17841790
if (v->mdfd_chain==NULL)
17851791
{
1786-
/*
1787-
* Normally we will create new segments only if authorized by the
1788-
* caller (i.e., we are doing mdextend()). But when doing WAL
1789-
* recovery, create segments anyway; this allows cases such as
1790-
* replaying WAL data that has a write into a high-numbered
1791-
* segment of a relation that was later deleted. We want to go
1792-
* ahead and create the segments so we can finish out the replay.
1793-
*
1794-
* We have to maintain the invariant that segments before the last
1795-
* active segment are of size RELSEG_SIZE; therefore, pad them out
1796-
* with zeroes if needed. (This only matters if caller is
1797-
* extending the relation discontiguously, but that can happen in
1798-
* hash indexes.)
1799-
*/
1800-
if (behavior==EXTENSION_CREATE||InRecovery)
1792+
BlockNumbernblocks=_mdnblocks(reln,forknum,v);
1793+
intflags=0;
1794+
1795+
if (nblocks> ((BlockNumber)RELSEG_SIZE))
1796+
elog(FATAL,"segment too big");
1797+
1798+
if (behavior==EXTENSION_CREATE||
1799+
(InRecovery&&behavior!=EXTENSION_REALLY_RETURN_NULL))
18011800
{
1802-
if (_mdnblocks(reln,forknum,v)<RELSEG_SIZE)
1801+
/*
1802+
* Normally we will create new segments only if authorized by
1803+
* the caller (i.e., we are doing mdextend()). But when doing
1804+
* WAL recovery, create segments anyway; this allows cases
1805+
* such as replaying WAL data that has a write into a
1806+
* high-numbered segment of a relation that was later deleted.
1807+
* We want to go ahead and create the segments so we can
1808+
* finish out the replay. However if the caller has specified
1809+
* EXTENSION_REALLY_RETURN_NULL, then extension is not desired
1810+
* even in recovery; we won't reach this point in that case.
1811+
*
1812+
* We have to maintain the invariant that segments before the
1813+
* last active segment are of size RELSEG_SIZE; therefore, if
1814+
* extending, pad them out with zeroes if needed. (This only
1815+
* matters if in recovery, or if the caller is extending the
1816+
* relation discontiguously, but that can happen in hash
1817+
* indexes.)
1818+
*/
1819+
if (nblocks< ((BlockNumber)RELSEG_SIZE))
18031820
{
18041821
char*zerobuf=palloc0(BLCKSZ);
18051822

@@ -1808,16 +1825,41 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
18081825
zerobuf,skipFsync);
18091826
pfree(zerobuf);
18101827
}
1811-
v->mdfd_chain=_mdfd_openseg(reln,forknum,+nextsegno,O_CREAT);
1828+
flags=O_CREAT;
18121829
}
1813-
else
1830+
elseif (nblocks< ((BlockNumber)RELSEG_SIZE))
18141831
{
1815-
/* We won't create segment if not existent */
1816-
v->mdfd_chain=_mdfd_openseg(reln,forknum,nextsegno,0);
1832+
/*
1833+
* When not extending, only open the next segment if the
1834+
* current one is exactly RELSEG_SIZE. If not (this branch),
1835+
* either return NULL or fail.
1836+
*/
1837+
if (behavior==EXTENSION_RETURN_NULL||
1838+
behavior==EXTENSION_REALLY_RETURN_NULL)
1839+
{
1840+
/*
1841+
* Some callers discern between reasons for _mdfd_getseg()
1842+
* returning NULL based on errno. As there's no failing
1843+
* syscall involved in this case, explicitly set errno to
1844+
* ENOENT, as that seems the closest interpretation.
1845+
*/
1846+
errno=ENOENT;
1847+
returnNULL;
1848+
}
1849+
1850+
ereport(ERROR,
1851+
(errcode_for_file_access(),
1852+
errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1853+
_mdfd_segpath(reln,forknum,nextsegno),
1854+
blkno,nblocks)));
18171855
}
1856+
1857+
v->mdfd_chain=_mdfd_openseg(reln,forknum,nextsegno,flags);
1858+
18181859
if (v->mdfd_chain==NULL)
18191860
{
1820-
if (behavior==EXTENSION_RETURN_NULL&&
1861+
if ((behavior==EXTENSION_RETURN_NULL||
1862+
behavior==EXTENSION_REALLY_RETURN_NULL)&&
18211863
FILE_POSSIBLY_DELETED(errno))
18221864
returnNULL;
18231865
ereport(ERROR,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp