Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit02657c4

Browse files
committed
Fix handling of WAL segments ready to be archived during crash recovery
78ea8b5 has fixed an issue related to the recycling of WAL segments onstandbys depending on archive_mode. However, it has introduced aregression with the handling of WAL segments ready to be archived duringcrash recovery, causing those files to be recycled without gettingarchived.This commit fixes the regression by tracking in shared memory if a livecluster is either in crash recovery or archive recovery as the handlingof WAL segments ready to be archived is different in both cases (thoseWAL segments should not be removed during crash recovery), and by usingthis new shared memory state to decide if a segment can be recycled ornot. Previously, it was not possible to know if a cluster was in crashrecovery or archive recovery as the shared state was able to track onlyif recovery was happening or not, leading to the problem.A set of TAP tests is added to close the gap here, making sure that WALsegments ready to be archived are correctly handled when a cluster is inarchive or crash recovery with archive_mode set to "on" or "always", forboth standby and primary.Reported-by: Benoît LobréauAuthor: Jehan-Guillaume de RorthaisReviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael PaquierDiscussion:https://postgr.es/m/20200331172229.40ee00dc@firostBackpatch-through: 9.5
1 parentb0b2168 commit02657c4

File tree

4 files changed

+305
-16
lines changed

4 files changed

+305
-16
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,9 @@ static TimeLineID receiveTLI = 0;
209209
staticboollastFullPageWrites;
210210

211211
/*
212-
* Local copy of SharedRecoveryInProgress variable. True actually means "not
213-
* known, need to check the shared state".
212+
* Local copy of the state tracked by SharedRecoveryState in shared memory,
213+
* It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually
214+
* means "not known, need to check the shared state".
214215
*/
215216
staticboolLocalRecoveryInProgress= true;
216217

@@ -635,10 +636,10 @@ typedef struct XLogCtlData
635636
chararchiveCleanupCommand[MAXPGPATH];
636637

637638
/*
638-
*SharedRecoveryInProgress indicates if we're still in crash or archive
639+
*SharedRecoveryState indicates if we're still in crash or archive
639640
* recovery. Protected by info_lck.
640641
*/
641-
boolSharedRecoveryInProgress;
642+
RecoveryStateSharedRecoveryState;
642643

643644
/*
644645
* SharedHotStandbyActive indicates if we're still in crash or archive
@@ -4295,6 +4296,16 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
42954296
updateMinRecoveryPoint= true;
42964297

42974298
UpdateControlFile();
4299+
4300+
/*
4301+
* We update SharedRecoveryState while holding the lock on
4302+
* ControlFileLock so both states are consistent in shared
4303+
* memory.
4304+
*/
4305+
SpinLockAcquire(&XLogCtl->info_lck);
4306+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_ARCHIVE;
4307+
SpinLockRelease(&XLogCtl->info_lck);
4308+
42984309
LWLockRelease(ControlFileLock);
42994310

43004311
CheckRecoveryConsistency();
@@ -4980,7 +4991,7 @@ XLOGShmemInit(void)
49804991
* in additional info.)
49814992
*/
49824993
XLogCtl->XLogCacheBlck=XLOGbuffers-1;
4983-
XLogCtl->SharedRecoveryInProgress=true;
4994+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_CRASH;
49844995
XLogCtl->SharedHotStandbyActive= false;
49854996
XLogCtl->WalWriterSleeping= false;
49864997

@@ -6803,7 +6814,13 @@ StartupXLOG(void)
68036814
*/
68046815
dbstate_at_startup=ControlFile->state;
68056816
if (InArchiveRecovery)
6817+
{
68066818
ControlFile->state=DB_IN_ARCHIVE_RECOVERY;
6819+
6820+
SpinLockAcquire(&XLogCtl->info_lck);
6821+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_ARCHIVE;
6822+
SpinLockRelease(&XLogCtl->info_lck);
6823+
}
68076824
else
68086825
{
68096826
ereport(LOG,
@@ -6816,6 +6833,10 @@ StartupXLOG(void)
68166833
ControlFile->checkPointCopy.ThisTimeLineID,
68176834
recoveryTargetTLI)));
68186835
ControlFile->state=DB_IN_CRASH_RECOVERY;
6836+
6837+
SpinLockAcquire(&XLogCtl->info_lck);
6838+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_CRASH;
6839+
SpinLockRelease(&XLogCtl->info_lck);
68196840
}
68206841
ControlFile->prevCheckPoint=ControlFile->checkPoint;
68216842
ControlFile->checkPoint=checkPointLoc;
@@ -7841,7 +7862,7 @@ StartupXLOG(void)
78417862
ControlFile->time= (pg_time_t)time(NULL);
78427863

78437864
SpinLockAcquire(&XLogCtl->info_lck);
7844-
XLogCtl->SharedRecoveryInProgress=false;
7865+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_DONE;
78457866
SpinLockRelease(&XLogCtl->info_lck);
78467867

78477868
UpdateControlFile();
@@ -7987,7 +8008,7 @@ RecoveryInProgress(void)
79878008
*/
79888009
volatileXLogCtlData*xlogctl=XLogCtl;
79898010

7990-
LocalRecoveryInProgress=xlogctl->SharedRecoveryInProgress;
8011+
LocalRecoveryInProgress=(xlogctl->SharedRecoveryState!=RECOVERY_STATE_DONE);
79918012

79928013
/*
79938014
* Initialize TimeLineID and RedoRecPtr when we discover that recovery
@@ -7999,8 +8020,8 @@ RecoveryInProgress(void)
79998020
{
80008021
/*
80018022
* If we just exited recovery, make sure we read TimeLineID and
8002-
* RedoRecPtr afterSharedRecoveryInProgress (for machines with
8003-
*weakmemory ordering).
8023+
* RedoRecPtr afterSharedRecoveryState (for machines with weak
8024+
* memory ordering).
80048025
*/
80058026
pg_memory_barrier();
80068027
InitXLOGAccess();
@@ -8016,6 +8037,24 @@ RecoveryInProgress(void)
80168037
}
80178038
}
80188039

8040+
/*
8041+
* Returns current recovery state from shared memory.
8042+
*
8043+
* This returned state is kept consistent with the contents of the control
8044+
* file. See details about the possible values of RecoveryState in xlog.h.
8045+
*/
8046+
RecoveryState
8047+
GetRecoveryState(void)
8048+
{
8049+
RecoveryStateretval;
8050+
8051+
SpinLockAcquire(&XLogCtl->info_lck);
8052+
retval=XLogCtl->SharedRecoveryState;
8053+
SpinLockRelease(&XLogCtl->info_lck);
8054+
8055+
returnretval;
8056+
}
8057+
80198058
/*
80208059
* Is HotStandby active yet? This is only important in special backends
80218060
* since normal backends won't ever be able to connect until this returns

‎src/backend/access/transam/xlogarchive.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -608,18 +608,25 @@ XLogArchiveCheckDone(const char *xlog)
608608
{
609609
chararchiveStatusPath[MAXPGPATH];
610610
structstatstat_buf;
611-
boolinRecovery=RecoveryInProgress();
611+
612+
/* The file is always deletable if archive_mode is "off". */
613+
if (!XLogArchivingActive())
614+
return true;
612615

613616
/*
614-
* The file is always deletable if archive_mode is "off". On standbys
615-
* archiving is disabled if archive_mode is "on", and enabled with
616-
* "always". On a primary, archiving is enabled if archive_mode is "on"
617-
* or "always".
617+
* During archive recovery, the file is deletable if archive_mode is not
618+
* "always".
618619
*/
619-
if (!((XLogArchivingActive()&& !inRecovery)||
620-
(XLogArchivingAlways()&&inRecovery)))
620+
if (!XLogArchivingAlways()&&
621+
GetRecoveryState()==RECOVERY_STATE_ARCHIVE)
621622
return true;
622623

624+
/*
625+
* At this point of the logic, note that we are either a primary with
626+
* archive_mode set to "on" or "always", or a standby with archive_mode
627+
* set to "always".
628+
*/
629+
623630
/* First check for .done --- this means archiver is done with it */
624631
StatusFilePath(archiveStatusPath,xlog,".done");
625632
if (stat(archiveStatusPath,&stat_buf)==0)

‎src/include/access/xlog.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,14 @@ typedef enum WalLevel
128128
WAL_LEVEL_LOGICAL
129129
}WalLevel;
130130

131+
/* Recovery states */
132+
typedefenumRecoveryState
133+
{
134+
RECOVERY_STATE_CRASH=0,/* crash recovery */
135+
RECOVERY_STATE_ARCHIVE,/* archive recovery */
136+
RECOVERY_STATE_DONE/* currently in production */
137+
}RecoveryState;
138+
131139
externPGDLLIMPORTintwal_level;
132140

133141
/* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -242,6 +250,7 @@ extern const char *xlog_identify(uint8 info);
242250
externvoidissue_xlog_fsync(intfd,XLogSegNosegno);
243251

244252
externboolRecoveryInProgress(void);
253+
externRecoveryStateGetRecoveryState(void);
245254
externboolHotStandbyActive(void);
246255
externboolHotStandbyActiveInReplay(void);
247256
externboolXLogInsertAllowed(void);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp