Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4e87c48

Browse files
committed
Fix handling of WAL segments ready to be archived during crash recovery
78ea8b5 has fixed an issue related to the recycling of WAL segments onstandbys depending on archive_mode. However, it has introduced aregression with the handling of WAL segments ready to be archived duringcrash recovery, causing those files to be recycled without gettingarchived.This commit fixes the regression by tracking in shared memory if a livecluster is either in crash recovery or archive recovery as the handlingof WAL segments ready to be archived is different in both cases (thoseWAL segments should not be removed during crash recovery), and by usingthis new shared memory state to decide if a segment can be recycled ornot. Previously, it was not possible to know if a cluster was in crashrecovery or archive recovery as the shared state was able to track onlyif recovery was happening or not, leading to the problem.A set of TAP tests is added to close the gap here, making sure that WALsegments ready to be archived are correctly handled when a cluster is inarchive or crash recovery with archive_mode set to "on" or "always", forboth standby and primary.Reported-by: Benoît LobréauAuthor: Jehan-Guillaume de RorthaisReviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael PaquierDiscussion:https://postgr.es/m/20200331172229.40ee00dc@firostBackpatch-through: 9.5
1 parent3436c5e commit4e87c48

File tree

5 files changed

+286
-16
lines changed

5 files changed

+286
-16
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,9 @@ static TimeLineID receiveTLI = 0;
221221
staticboollastFullPageWrites;
222222

223223
/*
224-
* Local copy of SharedRecoveryInProgress variable. True actually means "not
225-
* known, need to check the shared state".
224+
* Local copy of the state tracked by SharedRecoveryState in shared memory,
225+
* It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually
226+
* means "not known, need to check the shared state".
226227
*/
227228
staticboolLocalRecoveryInProgress= true;
228229

@@ -653,10 +654,10 @@ typedef struct XLogCtlData
653654
TimeLineIDPrevTimeLineID;
654655

655656
/*
656-
*SharedRecoveryInProgress indicates if we're still in crash or archive
657+
*SharedRecoveryState indicates if we're still in crash or archive
657658
* recovery. Protected by info_lck.
658659
*/
659-
boolSharedRecoveryInProgress;
660+
RecoveryStateSharedRecoveryState;
660661

661662
/*
662663
* SharedHotStandbyActive indicates if we allow hot standby queries to be
@@ -4434,6 +4435,16 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
44344435
updateMinRecoveryPoint= true;
44354436

44364437
UpdateControlFile();
4438+
4439+
/*
4440+
* We update SharedRecoveryState while holding the lock on
4441+
* ControlFileLock so both states are consistent in shared
4442+
* memory.
4443+
*/
4444+
SpinLockAcquire(&XLogCtl->info_lck);
4445+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_ARCHIVE;
4446+
SpinLockRelease(&XLogCtl->info_lck);
4447+
44374448
LWLockRelease(ControlFileLock);
44384449

44394450
CheckRecoveryConsistency();
@@ -5166,7 +5177,7 @@ XLOGShmemInit(void)
51665177
* in additional info.)
51675178
*/
51685179
XLogCtl->XLogCacheBlck=XLOGbuffers-1;
5169-
XLogCtl->SharedRecoveryInProgress=true;
5180+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_CRASH;
51705181
XLogCtl->SharedHotStandbyActive= false;
51715182
XLogCtl->SharedPromoteIsTriggered= false;
51725183
XLogCtl->WalWriterSleeping= false;
@@ -6871,7 +6882,13 @@ StartupXLOG(void)
68716882
*/
68726883
dbstate_at_startup=ControlFile->state;
68736884
if (InArchiveRecovery)
6885+
{
68746886
ControlFile->state=DB_IN_ARCHIVE_RECOVERY;
6887+
6888+
SpinLockAcquire(&XLogCtl->info_lck);
6889+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_ARCHIVE;
6890+
SpinLockRelease(&XLogCtl->info_lck);
6891+
}
68756892
else
68766893
{
68776894
ereport(LOG,
@@ -6884,6 +6901,10 @@ StartupXLOG(void)
68846901
ControlFile->checkPointCopy.ThisTimeLineID,
68856902
recoveryTargetTLI)));
68866903
ControlFile->state=DB_IN_CRASH_RECOVERY;
6904+
6905+
SpinLockAcquire(&XLogCtl->info_lck);
6906+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_CRASH;
6907+
SpinLockRelease(&XLogCtl->info_lck);
68876908
}
68886909
ControlFile->checkPoint=checkPointLoc;
68896910
ControlFile->checkPointCopy=checkPoint;
@@ -7911,7 +7932,7 @@ StartupXLOG(void)
79117932
ControlFile->time= (pg_time_t)time(NULL);
79127933

79137934
SpinLockAcquire(&XLogCtl->info_lck);
7914-
XLogCtl->SharedRecoveryInProgress=false;
7935+
XLogCtl->SharedRecoveryState=RECOVERY_STATE_DONE;
79157936
SpinLockRelease(&XLogCtl->info_lck);
79167937

79177938
UpdateControlFile();
@@ -8057,7 +8078,7 @@ RecoveryInProgress(void)
80578078
*/
80588079
volatileXLogCtlData*xlogctl=XLogCtl;
80598080

8060-
LocalRecoveryInProgress=xlogctl->SharedRecoveryInProgress;
8081+
LocalRecoveryInProgress=(xlogctl->SharedRecoveryState!=RECOVERY_STATE_DONE);
80618082

80628083
/*
80638084
* Initialize TimeLineID and RedoRecPtr when we discover that recovery
@@ -8069,8 +8090,8 @@ RecoveryInProgress(void)
80698090
{
80708091
/*
80718092
* If we just exited recovery, make sure we read TimeLineID and
8072-
* RedoRecPtr afterSharedRecoveryInProgress (for machines with
8073-
*weakmemory ordering).
8093+
* RedoRecPtr afterSharedRecoveryState (for machines with weak
8094+
* memory ordering).
80748095
*/
80758096
pg_memory_barrier();
80768097
InitXLOGAccess();
@@ -8086,6 +8107,24 @@ RecoveryInProgress(void)
80868107
}
80878108
}
80888109

8110+
/*
8111+
* Returns current recovery state from shared memory.
8112+
*
8113+
* This returned state is kept consistent with the contents of the control
8114+
* file. See details about the possible values of RecoveryState in xlog.h.
8115+
*/
8116+
RecoveryState
8117+
GetRecoveryState(void)
8118+
{
8119+
RecoveryStateretval;
8120+
8121+
SpinLockAcquire(&XLogCtl->info_lck);
8122+
retval=XLogCtl->SharedRecoveryState;
8123+
SpinLockRelease(&XLogCtl->info_lck);
8124+
8125+
returnretval;
8126+
}
8127+
80898128
/*
80908129
* Is HotStandby active yet? This is only important in special backends
80918130
* since normal backends won't ever be able to connect until this returns

‎src/backend/access/transam/xlogarchive.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -572,18 +572,25 @@ XLogArchiveCheckDone(const char *xlog)
572572
{
573573
chararchiveStatusPath[MAXPGPATH];
574574
structstatstat_buf;
575-
boolinRecovery=RecoveryInProgress();
575+
576+
/* The file is always deletable if archive_mode is "off". */
577+
if (!XLogArchivingActive())
578+
return true;
576579

577580
/*
578-
* The file is always deletable if archive_mode is "off". On standbys
579-
* archiving is disabled if archive_mode is "on", and enabled with
580-
* "always". On a primary, archiving is enabled if archive_mode is "on"
581-
* or "always".
581+
* During archive recovery, the file is deletable if archive_mode is not
582+
* "always".
582583
*/
583-
if (!((XLogArchivingActive()&& !inRecovery)||
584-
(XLogArchivingAlways()&&inRecovery)))
584+
if (!XLogArchivingAlways()&&
585+
GetRecoveryState()==RECOVERY_STATE_ARCHIVE)
585586
return true;
586587

588+
/*
589+
* At this point of the logic, note that we are either a primary with
590+
* archive_mode set to "on" or "always", or a standby with archive_mode
591+
* set to "always".
592+
*/
593+
587594
/* First check for .done --- this means archiver is done with it */
588595
StatusFilePath(archiveStatusPath,xlog,".done");
589596
if (stat(archiveStatusPath,&stat_buf)==0)

‎src/include/access/xlog.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,14 @@ typedef enum WalLevel
166166
WAL_LEVEL_LOGICAL
167167
}WalLevel;
168168

169+
/* Recovery states */
170+
typedefenumRecoveryState
171+
{
172+
RECOVERY_STATE_CRASH=0,/* crash recovery */
173+
RECOVERY_STATE_ARCHIVE,/* archive recovery */
174+
RECOVERY_STATE_DONE/* currently in production */
175+
}RecoveryState;
176+
169177
externPGDLLIMPORTintwal_level;
170178

171179
/* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -291,6 +299,7 @@ extern const char *xlog_identify(uint8 info);
291299
externvoidissue_xlog_fsync(intfd,XLogSegNosegno);
292300

293301
externboolRecoveryInProgress(void);
302+
externRecoveryStateGetRecoveryState(void);
294303
externboolHotStandbyActive(void);
295304
externboolHotStandbyActiveInReplay(void);
296305
externboolXLogInsertAllowed(void);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp