Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit22db526

Browse files
committed
Fix handling of WAL segments ready to be archived during crash recovery
78ea8b5 has fixed an issue related to the recycling of WAL segments onstandbys depending on archive_mode. However, it has introduced aregression with the handling of WAL segments ready to be archived duringcrash recovery, causing those files to be recycled without gettingarchived.This commit fixes the regression by tracking in shared memory if a livecluster is either in crash recovery or archive recovery as the handlingof WAL segments ready to be archived is different in both cases (thoseWAL segments should not be removed during crash recovery), and by usingthis new shared memory state to decide if a segment can be recycled ornot. Previously, it was not possible to know if a cluster was in crashrecovery or archive recovery as the shared state was able to track onlyif recovery was happening or not, leading to the problem.A set of TAP tests is added to close the gap here, making sure that WALsegments ready to be archived are correctly handled when a cluster is inarchive or crash recovery with archive_mode set to "on" or "always", forboth standby and primary.Reported-by: Benoît LobréauAuthor: Jehan-Guillaume de RorthaisReviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael PaquierDiscussion:https://postgr.es/m/20200331172229.40ee00dc@firostBackpatch-through: 9.5
1 parent95695c7 commit22db526

File tree

4 files changed

+285
-16
lines changed

4 files changed

+285
-16
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,9 @@ static TimeLineID receiveTLI = 0;
217217
staticboollastFullPageWrites;
218218

219219
/*
220-
* Local copy of SharedRecoveryInProgress variable. True actually means "not
221-
* known, need to check the shared state".
220+
* Local copy of the state tracked by SharedRecoveryState in shared memory,
221+
* It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually
222+
* means "not known, need to check the shared state".
222223
*/
223224
staticboolLocalRecoveryInProgress= true;
224225

@@ -643,10 +644,10 @@ typedef struct XLogCtlData
643644
TimeLineIDPrevTimeLineID;
644645

645646
/*
646-
*SharedRecoveryInProgress indicates if we're still in crash or archive
647+
*SharedRecoveryState indicates if we're still in crash or archive
647648
* recovery. Protected by info_lck.
648649
*/
649-
boolSharedRecoveryInProgress;
650+
RecoveryStateSharedRecoveryState;
650651

651652
/*
652653
* SharedHotStandbyActive indicates if we're still in crash or archive
@@ -4357,6 +4358,16 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
43574358
updateMinRecoveryPoint= true;
43584359

43594360
UpdateControlFile();
4361+
4362+
/*
4363+
* We update SharedRecoveryState while holding the lock on
4364+
* ControlFileLock so both states are consistent in shared
4365+
* memory.
4366+
*/
4367+
SpinLockAcquire(&XLogCtl->info_lck);
4368+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_ARCHIVE;
4369+
SpinLockRelease(&XLogCtl->info_lck);
4370+
43604371
LWLockRelease(ControlFileLock);
43614372

43624373
CheckRecoveryConsistency();
@@ -5069,7 +5080,7 @@ XLOGShmemInit(void)
50695080
* in additional info.)
50705081
*/
50715082
XLogCtl->XLogCacheBlck=XLOGbuffers-1;
5072-
XLogCtl->SharedRecoveryInProgress=true;
5083+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_CRASH;
50735084
XLogCtl->SharedHotStandbyActive= false;
50745085
XLogCtl->WalWriterSleeping= false;
50755086

@@ -6758,7 +6769,13 @@ StartupXLOG(void)
67586769
*/
67596770
dbstate_at_startup=ControlFile->state;
67606771
if (InArchiveRecovery)
6772+
{
67616773
ControlFile->state=DB_IN_ARCHIVE_RECOVERY;
6774+
6775+
SpinLockAcquire(&XLogCtl->info_lck);
6776+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_ARCHIVE;
6777+
SpinLockRelease(&XLogCtl->info_lck);
6778+
}
67626779
else
67636780
{
67646781
ereport(LOG,
@@ -6771,6 +6788,10 @@ StartupXLOG(void)
67716788
ControlFile->checkPointCopy.ThisTimeLineID,
67726789
recoveryTargetTLI)));
67736790
ControlFile->state=DB_IN_CRASH_RECOVERY;
6791+
6792+
SpinLockAcquire(&XLogCtl->info_lck);
6793+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_CRASH;
6794+
SpinLockRelease(&XLogCtl->info_lck);
67746795
}
67756796
ControlFile->checkPoint=checkPointLoc;
67766797
ControlFile->checkPointCopy=checkPoint;
@@ -7785,7 +7806,7 @@ StartupXLOG(void)
77857806
ControlFile->time= (pg_time_t)time(NULL);
77867807

77877808
SpinLockAcquire(&XLogCtl->info_lck);
7788-
XLogCtl->SharedRecoveryInProgress=false;
7809+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_DONE;
77897810
SpinLockRelease(&XLogCtl->info_lck);
77907811

77917812
UpdateControlFile();
@@ -7931,7 +7952,7 @@ RecoveryInProgress(void)
79317952
*/
79327953
volatileXLogCtlData*xlogctl=XLogCtl;
79337954

7934-
LocalRecoveryInProgress=xlogctl->SharedRecoveryInProgress;
7955+
LocalRecoveryInProgress=(xlogctl->SharedRecoveryState!=RECOVERY_STATE_DONE);
79357956

79367957
/*
79377958
* Initialize TimeLineID and RedoRecPtr when we discover that recovery
@@ -7943,8 +7964,8 @@ RecoveryInProgress(void)
79437964
{
79447965
/*
79457966
* If we just exited recovery, make sure we read TimeLineID and
7946-
* RedoRecPtr afterSharedRecoveryInProgress (for machines with
7947-
*weakmemory ordering).
7967+
* RedoRecPtr afterSharedRecoveryState (for machines with weak
7968+
* memory ordering).
79487969
*/
79497970
pg_memory_barrier();
79507971
InitXLOGAccess();
@@ -7960,6 +7981,24 @@ RecoveryInProgress(void)
79607981
}
79617982
}
79627983

7984+
/*
7985+
* Returns current recovery state from shared memory.
7986+
*
7987+
* This returned state is kept consistent with the contents of the control
7988+
* file. See details about the possible values of RecoveryState in xlog.h.
7989+
*/
7990+
RecoveryState
7991+
GetRecoveryState(void)
7992+
{
7993+
RecoveryStateretval;
7994+
7995+
SpinLockAcquire(&XLogCtl->info_lck);
7996+
retval=XLogCtl->SharedRecoveryState;
7997+
SpinLockRelease(&XLogCtl->info_lck);
7998+
7999+
returnretval;
8000+
}
8001+
79638002
/*
79648003
* Is HotStandby active yet? This is only important in special backends
79658004
* since normal backends won't ever be able to connect until this returns

‎src/backend/access/transam/xlogarchive.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -617,18 +617,25 @@ XLogArchiveCheckDone(const char *xlog)
617617
{
618618
chararchiveStatusPath[MAXPGPATH];
619619
structstatstat_buf;
620-
boolinRecovery=RecoveryInProgress();
620+
621+
/* The file is always deletable if archive_mode is "off". */
622+
if (!XLogArchivingActive())
623+
return true;
621624

622625
/*
623-
* The file is always deletable if archive_mode is "off". On standbys
624-
* archiving is disabled if archive_mode is "on", and enabled with
625-
* "always". On a primary, archiving is enabled if archive_mode is "on"
626-
* or "always".
626+
* During archive recovery, the file is deletable if archive_mode is not
627+
* "always".
627628
*/
628-
if (!((XLogArchivingActive()&& !inRecovery)||
629-
(XLogArchivingAlways()&&inRecovery)))
629+
if (!XLogArchivingAlways()&&
630+
GetRecoveryState()==RECOVERY_STATE_ARCHIVE)
630631
return true;
631632

633+
/*
634+
* At this point of the logic, note that we are either a primary with
635+
* archive_mode set to "on" or "always", or a standby with archive_mode
636+
* set to "always".
637+
*/
638+
632639
/* First check for .done --- this means archiver is done with it */
633640
StatusFilePath(archiveStatusPath,xlog,".done");
634641
if (stat(archiveStatusPath,&stat_buf)==0)

‎src/include/access/xlog.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,14 @@ typedef enum WalLevel
164164
WAL_LEVEL_LOGICAL
165165
}WalLevel;
166166

167+
/* Recovery states */
168+
typedefenumRecoveryState
169+
{
170+
RECOVERY_STATE_CRASH=0,/* crash recovery */
171+
RECOVERY_STATE_ARCHIVE,/* archive recovery */
172+
RECOVERY_STATE_DONE/* currently in production */
173+
}RecoveryState;
174+
167175
externPGDLLIMPORTintwal_level;
168176

169177
/* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -277,6 +285,7 @@ extern const char *xlog_identify(uint8 info);
277285
externvoidissue_xlog_fsync(intfd,XLogSegNosegno);
278286

279287
externboolRecoveryInProgress(void);
288+
externRecoveryStateGetRecoveryState(void);
280289
externboolHotStandbyActive(void);
281290
externboolHotStandbyActiveInReplay(void);
282291
externboolXLogInsertAllowed(void);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp