Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2d24ca0

Browse files
committed
Fix handling of WAL segments ready to be archived during crash recovery
78ea8b5 has fixed an issue related to the recycling of WAL segments onstandbys depending on archive_mode. However, it has introduced aregression with the handling of WAL segments ready to be archived duringcrash recovery, causing those files to be recycled without gettingarchived.This commit fixes the regression by tracking in shared memory if a livecluster is either in crash recovery or archive recovery as the handlingof WAL segments ready to be archived is different in both cases (thoseWAL segments should not be removed during crash recovery), and by usingthis new shared memory state to decide if a segment can be recycled ornot. Previously, it was not possible to know if a cluster was in crashrecovery or archive recovery as the shared state was able to track onlyif recovery was happening or not, leading to the problem.A set of TAP tests is added to close the gap here, making sure that WALsegments ready to be archived are correctly handled when a cluster is inarchive or crash recovery with archive_mode set to "on" or "always", forboth standby and primary.Reported-by: Benoît LobréauAuthor: Jehan-Guillaume de RorthaisReviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael PaquierDiscussion:https://postgr.es/m/20200331172229.40ee00dc@firostBackpatch-through: 9.5
1 parent34b7020 commit2d24ca0

File tree

4 files changed

+285
-16
lines changed

4 files changed

+285
-16
lines changed

‎src/backend/access/transam/xlog.c‎

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,9 @@ static TimeLineID receiveTLI = 0;
211211
staticboollastFullPageWrites;
212212

213213
/*
214-
* Local copy of SharedRecoveryInProgress variable. True actually means "not
215-
* known, need to check the shared state".
214+
* Local copy of the state tracked by SharedRecoveryState in shared memory,
215+
* It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually
216+
* means "not known, need to check the shared state".
216217
*/
217218
staticboolLocalRecoveryInProgress= true;
218219

@@ -637,10 +638,10 @@ typedef struct XLogCtlData
637638
chararchiveCleanupCommand[MAXPGPATH];
638639

639640
/*
640-
*SharedRecoveryInProgress indicates if we're still in crash or archive
641+
*SharedRecoveryState indicates if we're still in crash or archive
641642
* recovery. Protected by info_lck.
642643
*/
643-
boolSharedRecoveryInProgress;
644+
RecoveryStateSharedRecoveryState;
644645

645646
/*
646647
* SharedHotStandbyActive indicates if we're still in crash or archive
@@ -4306,6 +4307,16 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
43064307
updateMinRecoveryPoint= true;
43074308

43084309
UpdateControlFile();
4310+
4311+
/*
4312+
* We update SharedRecoveryState while holding the lock on
4313+
* ControlFileLock so both states are consistent in shared
4314+
* memory.
4315+
*/
4316+
SpinLockAcquire(&XLogCtl->info_lck);
4317+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_ARCHIVE;
4318+
SpinLockRelease(&XLogCtl->info_lck);
4319+
43094320
LWLockRelease(ControlFileLock);
43104321

43114322
CheckRecoveryConsistency();
@@ -5047,7 +5058,7 @@ XLOGShmemInit(void)
50475058
* in additional info.)
50485059
*/
50495060
XLogCtl->XLogCacheBlck=XLOGbuffers-1;
5050-
XLogCtl->SharedRecoveryInProgress=true;
5061+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_CRASH;
50515062
XLogCtl->SharedHotStandbyActive= false;
50525063
XLogCtl->WalWriterSleeping= false;
50535064

@@ -6859,7 +6870,13 @@ StartupXLOG(void)
68596870
*/
68606871
dbstate_at_startup=ControlFile->state;
68616872
if (InArchiveRecovery)
6873+
{
68626874
ControlFile->state=DB_IN_ARCHIVE_RECOVERY;
6875+
6876+
SpinLockAcquire(&XLogCtl->info_lck);
6877+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_ARCHIVE;
6878+
SpinLockRelease(&XLogCtl->info_lck);
6879+
}
68636880
else
68646881
{
68656882
ereport(LOG,
@@ -6872,6 +6889,10 @@ StartupXLOG(void)
68726889
ControlFile->checkPointCopy.ThisTimeLineID,
68736890
recoveryTargetTLI)));
68746891
ControlFile->state=DB_IN_CRASH_RECOVERY;
6892+
6893+
SpinLockAcquire(&XLogCtl->info_lck);
6894+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_CRASH;
6895+
SpinLockRelease(&XLogCtl->info_lck);
68756896
}
68766897
ControlFile->checkPoint=checkPointLoc;
68776898
ControlFile->checkPointCopy=checkPoint;
@@ -7896,7 +7917,7 @@ StartupXLOG(void)
78967917
ControlFile->time= (pg_time_t)time(NULL);
78977918

78987919
SpinLockAcquire(&XLogCtl->info_lck);
7899-
XLogCtl->SharedRecoveryInProgress=false;
7920+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_DONE;
79007921
SpinLockRelease(&XLogCtl->info_lck);
79017922

79027923
UpdateControlFile();
@@ -8042,7 +8063,7 @@ RecoveryInProgress(void)
80428063
*/
80438064
volatileXLogCtlData*xlogctl=XLogCtl;
80448065

8045-
LocalRecoveryInProgress=xlogctl->SharedRecoveryInProgress;
8066+
LocalRecoveryInProgress=(xlogctl->SharedRecoveryState!=RECOVERY_STATE_DONE);
80468067

80478068
/*
80488069
* Initialize TimeLineID and RedoRecPtr when we discover that recovery
@@ -8054,8 +8075,8 @@ RecoveryInProgress(void)
80548075
{
80558076
/*
80568077
* If we just exited recovery, make sure we read TimeLineID and
8057-
* RedoRecPtr afterSharedRecoveryInProgress (for machines with
8058-
*weakmemory ordering).
8078+
* RedoRecPtr afterSharedRecoveryState (for machines with weak
8079+
* memory ordering).
80598080
*/
80608081
pg_memory_barrier();
80618082
InitXLOGAccess();
@@ -8071,6 +8092,24 @@ RecoveryInProgress(void)
80718092
}
80728093
}
80738094

8095+
/*
8096+
* Returns current recovery state from shared memory.
8097+
*
8098+
* This returned state is kept consistent with the contents of the control
8099+
* file. See details about the possible values of RecoveryState in xlog.h.
8100+
*/
8101+
RecoveryState
8102+
GetRecoveryState(void)
8103+
{
8104+
RecoveryStateretval;
8105+
8106+
SpinLockAcquire(&XLogCtl->info_lck);
8107+
retval=XLogCtl->SharedRecoveryState;
8108+
SpinLockRelease(&XLogCtl->info_lck);
8109+
8110+
returnretval;
8111+
}
8112+
80748113
/*
80758114
* Is HotStandby active yet? This is only important in special backends
80768115
* since normal backends won't ever be able to connect until this returns

‎src/backend/access/transam/xlogarchive.c‎

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -610,18 +610,25 @@ XLogArchiveCheckDone(const char *xlog)
610610
{
611611
chararchiveStatusPath[MAXPGPATH];
612612
structstatstat_buf;
613-
boolinRecovery=RecoveryInProgress();
613+
614+
/* The file is always deletable if archive_mode is "off". */
615+
if (!XLogArchivingActive())
616+
return true;
614617

615618
/*
616-
* The file is always deletable if archive_mode is "off". On standbys
617-
* archiving is disabled if archive_mode is "on", and enabled with
618-
* "always". On a primary, archiving is enabled if archive_mode is "on"
619-
* or "always".
619+
* During archive recovery, the file is deletable if archive_mode is not
620+
* "always".
620621
*/
621-
if (!((XLogArchivingActive()&& !inRecovery)||
622-
(XLogArchivingAlways()&&inRecovery)))
622+
if (!XLogArchivingAlways()&&
623+
GetRecoveryState()==RECOVERY_STATE_ARCHIVE)
623624
return true;
624625

626+
/*
627+
* At this point of the logic, note that we are either a primary with
628+
* archive_mode set to "on" or "always", or a standby with archive_mode
629+
* set to "always".
630+
*/
631+
625632
/* First check for .done --- this means archiver is done with it */
626633
StatusFilePath(archiveStatusPath,xlog,".done");
627634
if (stat(archiveStatusPath,&stat_buf)==0)

‎src/include/access/xlog.h‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,14 @@ typedef enum WalLevel
129129
WAL_LEVEL_LOGICAL
130130
}WalLevel;
131131

132+
/* Recovery states */
133+
typedefenumRecoveryState
134+
{
135+
RECOVERY_STATE_CRASH=0,/* crash recovery */
136+
RECOVERY_STATE_ARCHIVE,/* archive recovery */
137+
RECOVERY_STATE_DONE/* currently in production */
138+
}RecoveryState;
139+
132140
externPGDLLIMPORTintwal_level;
133141

134142
/* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -243,6 +251,7 @@ extern const char *xlog_identify(uint8 info);
243251
externvoidissue_xlog_fsync(intfd,XLogSegNosegno);
244252

245253
externboolRecoveryInProgress(void);
254+
externRecoveryStateGetRecoveryState(void);
246255
externboolHotStandbyActive(void);
247256
externboolHotStandbyActiveInReplay(void);
248257
externboolXLogInsertAllowed(void);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp