Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit675b771

Browse files
nmischmichaelpq
authored andcommitted
Don't ERROR on PreallocXlogFiles() race condition.
Before a restartpoint finishes PreallocXlogFiles(), a startup processKeepFileRestoredFromArchive() call can unlink the preallocated segment.If a CHECKPOINT sql command had elicited the restartpoint experiencingthe race condition, that sql command failed. Moreover, the restartpointomitted its log_checkpoints message and some inessential resourcereclamation. Prevent the ERROR by skipping open() of the segment.Since these consequences are so minor, no back-patch.This commit has been applied as of2b3e467 in v15 and newerversions. This is required on stable branches of v13 and v14 to fix aregression reported by Noah Misch, introduced by1f95181, causingspurious failures in archive recovery (neither streaming nor archiverecovery) with concurrent restartpoints. The backpatched versions ofthe patches have been aligned on these branches by me, Noah Misch is theauthor. Tests have been conducted by the both of us.Reported-by: Arun ThirupathiAuthor: Noah Misch <noah@leadboat.com>Discussion:https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.comDiscussion:https://postgr.es/m/20250306193013.36.nmisch@google.comBackpatch-through: 13
1 parent47d2d29 commit675b771

File tree

3 files changed

+58
-27
lines changed

3 files changed

+58
-27
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2452,7 +2452,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
24522452
boolispartialpage;
24532453
boollast_iteration;
24542454
boolfinishing_seg;
2455-
booladded;
24562455
intcurridx;
24572456
intnpages;
24582457
intstartidx;
@@ -2518,7 +2517,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
25182517
wal_segment_size);
25192518

25202519
/* create/use new log file */
2521-
openLogFile=XLogFileInit(openLogSegNo,&added);
2520+
openLogFile=XLogFileInit(openLogSegNo);
25222521
ReserveExternalFD();
25232522
}
25242523

@@ -3283,23 +3282,21 @@ XLogNeedsFlush(XLogRecPtr record)
32833282
}
32843283

32853284
/*
3286-
*Create a new XLOG file segment, or open a pre-existing one.
3285+
*Try to make a given XLOG file segment exist.
32873286
*
3288-
* logsegno: identify segment to be created/opened.
3287+
* logsegno: identify segment.
32893288
*
32903289
* *added: on return, true if this call raised the number of extant segments.
32913290
*
3292-
*Returns FD of opened file.
3291+
*path: on return, this char[MAXPGPATH] has the path to the logsegno file.
32933292
*
3294-
* Note: errors here are ERROR not PANIC because we might or might not be
3295-
* inside a critical section (eg, during checkpoint there is no reason to
3296-
* take down the system on failure). They will promote to PANIC if we are
3297-
* in a critical section.
3293+
* Returns -1 or FD of opened file. A -1 here is not an error; a caller
3294+
* wanting an open segment should attempt to open "path", which usually will
3295+
* succeed. (This is weird, but it's efficient for the callers.)
32983296
*/
3299-
int
3300-
XLogFileInit(XLogSegNologsegno,bool*added)
3297+
staticint
3298+
XLogFileInitInternal(XLogSegNologsegno,bool*added,char*path)
33013299
{
3302-
charpath[MAXPGPATH];
33033300
chartmppath[MAXPGPATH];
33043301
PGAlignedXLogBlockzbuffer;
33053302
XLogSegNoinstalled_segno;
@@ -3452,26 +3449,53 @@ XLogFileInit(XLogSegNo logsegno, bool *added)
34523449
*/
34533450
max_segno=logsegno+CheckPointSegments;
34543451
if (InstallXLogFileSegment(&installed_segno,tmppath, true,max_segno))
3452+
{
34553453
*added= true;
3454+
elog(DEBUG2,"done creating and filling new WAL file");
3455+
}
34563456
else
34573457
{
34583458
/*
34593459
* No need for any more future segments, or InstallXLogFileSegment()
3460-
* failed to rename the file into place. If the rename failed,opening
3461-
* the filebelow will fail.
3460+
* failed to rename the file into place. If the rename failed,a
3461+
*caller openingthe filemay fail.
34623462
*/
34633463
unlink(tmppath);
3464+
elog(DEBUG2,"abandoned new WAL file");
34643465
}
34653466

3467+
return-1;
3468+
}
3469+
3470+
/*
3471+
* Create a new XLOG file segment, or open a pre-existing one.
3472+
*
3473+
* logsegno: identify segment to be created/opened.
3474+
*
3475+
* Returns FD of opened file.
3476+
*
3477+
* Note: errors here are ERROR not PANIC because we might or might not be
3478+
* inside a critical section (eg, during checkpoint there is no reason to
3479+
* take down the system on failure). They will promote to PANIC if we are
3480+
* in a critical section.
3481+
*/
3482+
int
3483+
XLogFileInit(XLogSegNologsegno)
3484+
{
3485+
boolignore_added;
3486+
charpath[MAXPGPATH];
3487+
intfd;
3488+
3489+
fd=XLogFileInitInternal(logsegno,&ignore_added,path);
3490+
if (fd >=0)
3491+
returnfd;
3492+
34663493
/* Now open original target segment (might not be file I just made) */
34673494
fd=BasicOpenFile(path,O_RDWR |PG_BINARY |get_sync_bit(sync_method));
34683495
if (fd<0)
34693496
ereport(ERROR,
34703497
(errcode_for_file_access(),
34713498
errmsg("could not open file \"%s\": %m",path)));
3472-
3473-
elog(DEBUG2,"done creating and filling new WAL file");
3474-
34753499
returnfd;
34763500
}
34773501

@@ -3928,22 +3952,33 @@ XLogFileClose(void)
39283952
* High-volume systems will be OK once they've built up a sufficient set of
39293953
* recycled log segments, but the startup transient is likely to include
39303954
* a lot of segment creations by foreground processes, which is not so good.
3955+
*
3956+
* XLogFileInitInternal() can ereport(ERROR). All known causes indicate big
3957+
* trouble; for example, a full filesystem is one cause. The checkpoint WAL
3958+
* and/or ControlFile updates already completed. If a RequestCheckpoint()
3959+
* initiated the present checkpoint and an ERROR ends this function, the
3960+
* command that called RequestCheckpoint() fails. That's not ideal, but it's
3961+
* not worth contorting more functions to use caller-specified elevel values.
3962+
* (With or without RequestCheckpoint(), an ERROR forestalls some inessential
3963+
* reporting and resource reclamation.)
39313964
*/
39323965
staticvoid
39333966
PreallocXlogFiles(XLogRecPtrendptr)
39343967
{
39353968
XLogSegNo_logSegNo;
39363969
intlf;
39373970
booladded;
3971+
charpath[MAXPGPATH];
39383972
uint64offset;
39393973

39403974
XLByteToPrevSeg(endptr,_logSegNo,wal_segment_size);
39413975
offset=XLogSegmentOffset(endptr-1,wal_segment_size);
39423976
if (offset >= (uint32) (0.75*wal_segment_size))
39433977
{
39443978
_logSegNo++;
3945-
lf=XLogFileInit(_logSegNo,&added);
3946-
close(lf);
3979+
lf=XLogFileInitInternal(_logSegNo,&added,path);
3980+
if (lf >=0)
3981+
close(lf);
39473982
if (added)
39483983
CheckpointStats.ckpt_segs_added++;
39493984
}
@@ -5258,7 +5293,6 @@ BootStrapXLOG(void)
52585293
XLogLongPageHeaderlongpage;
52595294
XLogRecord*record;
52605295
char*recptr;
5261-
booladded;
52625296
uint64sysidentifier;
52635297
structtimevaltv;
52645298
pg_crc32ccrc;
@@ -5355,7 +5389,7 @@ BootStrapXLOG(void)
53555389
record->xl_crc=crc;
53565390

53575391
/* Create first XLOG segment file */
5358-
openLogFile=XLogFileInit(1,&added);
5392+
openLogFile=XLogFileInit(1);
53595393

53605394
/*
53615395
* We needn't bother with Reserve/ReleaseExternalFD here, since we'll
@@ -5661,10 +5695,9 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
56615695
* The switch happened at a segment boundary, so just create the next
56625696
* segment on the new timeline.
56635697
*/
5664-
booladded;
56655698
intfd;
56665699

5667-
fd=XLogFileInit(startLogSegNo,&added);
5700+
fd=XLogFileInit(startLogSegNo);
56685701

56695702
if (close(fd)!=0)
56705703
{

‎src/backend/replication/walreceiver.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -890,11 +890,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
890890

891891
if (recvFile<0)
892892
{
893-
booladded= true;
894-
895893
/* Create/use new log file */
896894
XLByteToSeg(recptr,recvSegNo,wal_segment_size);
897-
recvFile=XLogFileInit(recvSegNo,&added);
895+
recvFile=XLogFileInit(recvSegNo);
898896
recvFileTLI=ThisTimeLineID;
899897
}
900898

‎src/include/access/xlog.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
296296
externvoidXLogFlush(XLogRecPtrRecPtr);
297297
externboolXLogBackgroundFlush(void);
298298
externboolXLogNeedsFlush(XLogRecPtrRecPtr);
299-
externintXLogFileInit(XLogSegNosegno,bool*added);
299+
externintXLogFileInit(XLogSegNosegno);
300300
externintXLogFileOpen(XLogSegNosegno);
301301

302302
externvoidCheckXLogRemoved(XLogSegNosegno,TimeLineIDtli);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp