Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitcbed472

Browse files
nmischmichaelpq
authored andcommitted
Don't ERROR on PreallocXlogFiles() race condition.
Before a restartpoint finishes PreallocXlogFiles(), a startup processKeepFileRestoredFromArchive() call can unlink the preallocated segment.If a CHECKPOINT sql command had elicited the restartpoint experiencingthe race condition, that sql command failed. Moreover, the restartpointomitted its log_checkpoints message and some inessential resourcereclamation. Prevent the ERROR by skipping open() of the segment.Since these consequences are so minor, no back-patch.This commit has been applied as of2b3e467 in v15 and newerversions. This is required on stable branches of v13 and v14 to fix aregression reported by Noah Misch, introduced by1f95181, causingspurious failures in archive recovery (neither streaming nor archiverecovery) with concurrent restartpoints. The backpatched versions ofthe patches have been aligned on these branches by me, Noah Misch is theauthor. Tests have been conducted by the both of us.Reported-by: Arun ThirupathiAuthor: Noah Misch <noah@leadboat.com>Discussion:https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.comDiscussion:https://postgr.es/m/20250306193013.36.nmisch@google.comBackpatch-through: 13
1 parente77d9cd commitcbed472

File tree

3 files changed

+58
-27
lines changed

3 files changed

+58
-27
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2440,7 +2440,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
24402440
boolispartialpage;
24412441
boollast_iteration;
24422442
boolfinishing_seg;
2443-
booladded;
24442443
intcurridx;
24452444
intnpages;
24462445
intstartidx;
@@ -2507,7 +2506,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
25072506
wal_segment_size);
25082507

25092508
/* create/use new log file */
2510-
openLogFile=XLogFileInit(openLogSegNo,&added);
2509+
openLogFile=XLogFileInit(openLogSegNo);
25112510
ReserveExternalFD();
25122511
}
25132512

@@ -3253,23 +3252,21 @@ XLogNeedsFlush(XLogRecPtr record)
32533252
}
32543253

32553254
/*
3256-
*Create a new XLOG file segment, or open a pre-existing one.
3255+
*Try to make a given XLOG file segment exist.
32573256
*
3258-
* logsegno: identify segment to be created/opened.
3257+
* logsegno: identify segment.
32593258
*
32603259
* *added: on return, true if this call raised the number of extant segments.
32613260
*
3262-
*Returns FD of opened file.
3261+
*path: on return, this char[MAXPGPATH] has the path to the logsegno file.
32633262
*
3264-
* Note: errors here are ERROR not PANIC because we might or might not be
3265-
* inside a critical section (eg, during checkpoint there is no reason to
3266-
* take down the system on failure). They will promote to PANIC if we are
3267-
* in a critical section.
3263+
* Returns -1 or FD of opened file. A -1 here is not an error; a caller
3264+
* wanting an open segment should attempt to open "path", which usually will
3265+
* succeed. (This is weird, but it's efficient for the callers.)
32683266
*/
3269-
int
3270-
XLogFileInit(XLogSegNologsegno,bool*added)
3267+
staticint
3268+
XLogFileInitInternal(XLogSegNologsegno,bool*added,char*path)
32713269
{
3272-
charpath[MAXPGPATH];
32733270
chartmppath[MAXPGPATH];
32743271
PGAlignedXLogBlockzbuffer;
32753272
XLogSegNoinstalled_segno;
@@ -3407,26 +3404,53 @@ XLogFileInit(XLogSegNo logsegno, bool *added)
34073404
*/
34083405
max_segno=logsegno+CheckPointSegments;
34093406
if (InstallXLogFileSegment(&installed_segno,tmppath, true,max_segno))
3407+
{
34103408
*added= true;
3409+
elog(DEBUG2,"done creating and filling new WAL file");
3410+
}
34113411
else
34123412
{
34133413
/*
34143414
* No need for any more future segments, or InstallXLogFileSegment()
3415-
* failed to rename the file into place. If the rename failed,opening
3416-
* the filebelow will fail.
3415+
* failed to rename the file into place. If the rename failed,a
3416+
*caller openingthe filemay fail.
34173417
*/
34183418
unlink(tmppath);
3419+
elog(DEBUG2,"abandoned new WAL file");
34193420
}
34203421

3422+
return-1;
3423+
}
3424+
3425+
/*
3426+
* Create a new XLOG file segment, or open a pre-existing one.
3427+
*
3428+
* logsegno: identify segment to be created/opened.
3429+
*
3430+
* Returns FD of opened file.
3431+
*
3432+
* Note: errors here are ERROR not PANIC because we might or might not be
3433+
* inside a critical section (eg, during checkpoint there is no reason to
3434+
* take down the system on failure). They will promote to PANIC if we are
3435+
* in a critical section.
3436+
*/
3437+
int
3438+
XLogFileInit(XLogSegNologsegno)
3439+
{
3440+
boolignore_added;
3441+
charpath[MAXPGPATH];
3442+
intfd;
3443+
3444+
fd=XLogFileInitInternal(logsegno,&ignore_added,path);
3445+
if (fd >=0)
3446+
returnfd;
3447+
34213448
/* Now open original target segment (might not be file I just made) */
34223449
fd=BasicOpenFile(path,O_RDWR |PG_BINARY |get_sync_bit(sync_method));
34233450
if (fd<0)
34243451
ereport(ERROR,
34253452
(errcode_for_file_access(),
34263453
errmsg("could not open file \"%s\": %m",path)));
3427-
3428-
elog(DEBUG2,"done creating and filling new WAL file");
3429-
34303454
returnfd;
34313455
}
34323456

@@ -3883,22 +3907,33 @@ XLogFileClose(void)
38833907
* High-volume systems will be OK once they've built up a sufficient set of
38843908
* recycled log segments, but the startup transient is likely to include
38853909
* a lot of segment creations by foreground processes, which is not so good.
3910+
*
3911+
* XLogFileInitInternal() can ereport(ERROR). All known causes indicate big
3912+
* trouble; for example, a full filesystem is one cause. The checkpoint WAL
3913+
* and/or ControlFile updates already completed. If a RequestCheckpoint()
3914+
* initiated the present checkpoint and an ERROR ends this function, the
3915+
* command that called RequestCheckpoint() fails. That's not ideal, but it's
3916+
* not worth contorting more functions to use caller-specified elevel values.
3917+
* (With or without RequestCheckpoint(), an ERROR forestalls some inessential
3918+
* reporting and resource reclamation.)
38863919
*/
38873920
staticvoid
38883921
PreallocXlogFiles(XLogRecPtrendptr)
38893922
{
38903923
XLogSegNo_logSegNo;
38913924
intlf;
38923925
booladded;
3926+
charpath[MAXPGPATH];
38933927
uint64offset;
38943928

38953929
XLByteToPrevSeg(endptr,_logSegNo,wal_segment_size);
38963930
offset=XLogSegmentOffset(endptr-1,wal_segment_size);
38973931
if (offset >= (uint32) (0.75*wal_segment_size))
38983932
{
38993933
_logSegNo++;
3900-
lf=XLogFileInit(_logSegNo,&added);
3901-
close(lf);
3934+
lf=XLogFileInitInternal(_logSegNo,&added,path);
3935+
if (lf >=0)
3936+
close(lf);
39023937
if (added)
39033938
CheckpointStats.ckpt_segs_added++;
39043939
}
@@ -5212,7 +5247,6 @@ BootStrapXLOG(void)
52125247
XLogLongPageHeaderlongpage;
52135248
XLogRecord*record;
52145249
char*recptr;
5215-
booladded;
52165250
uint64sysidentifier;
52175251
structtimevaltv;
52185252
pg_crc32ccrc;
@@ -5309,7 +5343,7 @@ BootStrapXLOG(void)
53095343
record->xl_crc=crc;
53105344

53115345
/* Create first XLOG segment file */
5312-
openLogFile=XLogFileInit(1,&added);
5346+
openLogFile=XLogFileInit(1);
53135347

53145348
/*
53155349
* We needn't bother with Reserve/ReleaseExternalFD here, since we'll
@@ -5615,10 +5649,9 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
56155649
* The switch happened at a segment boundary, so just create the next
56165650
* segment on the new timeline.
56175651
*/
5618-
booladded;
56195652
intfd;
56205653

5621-
fd=XLogFileInit(startLogSegNo,&added);
5654+
fd=XLogFileInit(startLogSegNo);
56225655

56235656
if (close(fd)!=0)
56245657
{

‎src/backend/replication/walreceiver.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -918,11 +918,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
918918

919919
if (recvFile<0)
920920
{
921-
booladded= true;
922-
923921
/* Create/use new log file */
924922
XLByteToSeg(recptr,recvSegNo,wal_segment_size);
925-
recvFile=XLogFileInit(recvSegNo,&added);
923+
recvFile=XLogFileInit(recvSegNo);
926924
recvFileTLI=ThisTimeLineID;
927925
}
928926

‎src/include/access/xlog.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
286286
externvoidXLogFlush(XLogRecPtrRecPtr);
287287
externboolXLogBackgroundFlush(void);
288288
externboolXLogNeedsFlush(XLogRecPtrRecPtr);
289-
externintXLogFileInit(XLogSegNosegno,bool*added);
289+
externintXLogFileInit(XLogSegNosegno);
290290
externintXLogFileOpen(XLogSegNosegno);
291291

292292
externvoidCheckXLogRemoved(XLogSegNosegno,TimeLineIDtli);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp