Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitfd4ced5

Browse files
Fast promote mode skips checkpoint at end of recovery.
pg_ctl promote -m fast will skip the checkpoint at end of recovery so that wecan achieve very fast failover when the apply delay is low. Write new WAL recordXLOG_END_OF_RECOVERY to allow us to switch timeline correctly for downstream logreaders. If we skip synchronous end of recovery checkpoint we request a normalspread checkpoint so that the window of re-recovery is low.Simon Riggs and Kyotaro Horiguchi, with input from Fujii Masao.Review by Heikki Linnakangas
1 parentee22c55 commitfd4ced5

File tree

5 files changed

+195
-32
lines changed

5 files changed

+195
-32
lines changed

‎src/backend/access/rmgrdesc/xlogdesc.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include"access/xlog_internal.h"
1919
#include"catalog/pg_control.h"
2020
#include"utils/guc.h"
21+
#include"utils/timestamp.h"
2122

2223
/*
2324
* GUC support
@@ -119,6 +120,15 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
119120
memcpy(&fpw,rec,sizeof(bool));
120121
appendStringInfo(buf,"full_page_writes: %s",fpw ?"true" :"false");
121122
}
123+
elseif (info==XLOG_END_OF_RECOVERY)
124+
{
125+
xl_end_of_recoveryxlrec;
126+
127+
memcpy(&xlrec,rec,sizeof(xl_end_of_recovery));
128+
appendStringInfo(buf,"end_of_recovery: tli %u; time %s",
129+
xlrec.ThisTimeLineID,
130+
timestamptz_to_str(xlrec.end_time));
131+
}
122132
else
123133
appendStringInfo(buf,"UNKNOWN");
124134
}

‎src/backend/access/transam/xlog.c

Lines changed: 163 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
#defineRECOVERY_COMMAND_FILE"recovery.conf"
6767
#defineRECOVERY_COMMAND_DONE"recovery.done"
6868
#definePROMOTE_SIGNAL_FILE "promote"
69+
#defineFAST_PROMOTE_SIGNAL_FILE "fast_promote"
6970

7071

7172
/* User-settable parameters */
@@ -210,6 +211,9 @@ bool StandbyMode = false;
210211
staticchar*PrimaryConnInfo=NULL;
211212
staticchar*TriggerFile=NULL;
212213

214+
/* whether request for fast promotion has been made yet */
215+
staticboolfast_promote= false;
216+
213217
/* if recoveryStopsHere returns true, it saves actual stop xid/time/name here */
214218
staticTransactionIdrecoveryStopXid;
215219
staticTimestampTzrecoveryStopTime;
@@ -611,6 +615,7 @@ static void CheckRequiredParameterValues(void);
611615
staticvoidXLogReportParameters(void);
612616
staticvoidcheckTimeLineSwitch(XLogRecPtrlsn,TimeLineIDnewTLI);
613617
staticvoidLocalSetXLogInsertAllowed(void);
618+
staticvoidCreateEndOfRecoveryRecord(void);
614619
staticvoidCheckPointGuts(XLogRecPtrcheckPointRedo,intflags);
615620
staticvoidKeepLogSeg(XLogRecPtrrecptr,XLogSegNo*logSegNo);
616621

@@ -642,7 +647,7 @@ static XLogRecord *ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
642647
intemode,boolfetching_ckpt);
643648
staticvoidCheckRecoveryConsistency(void);
644649
staticXLogRecord*ReadCheckpointRecord(XLogReaderState*xlogreader,
645-
XLogRecPtrRecPtr,intwhichChkpt);
650+
XLogRecPtrRecPtr,intwhichChkpti,boolreport);
646651
staticboolrescanLatestTimeLine(void);
647652
staticvoidWriteControlFile(void);
648653
staticvoidReadControlFile(void);
@@ -4848,7 +4853,7 @@ StartupXLOG(void)
48484853
* When a backup_label file is present, we want to roll forward from
48494854
* the checkpoint it identifies, rather than using pg_control.
48504855
*/
4851-
record=ReadCheckpointRecord(xlogreader,checkPointLoc,0);
4856+
record=ReadCheckpointRecord(xlogreader,checkPointLoc,0, true);
48524857
if (record!=NULL)
48534858
{
48544859
memcpy(&checkPoint,XLogRecGetData(record),sizeof(CheckPoint));
@@ -4890,7 +4895,7 @@ StartupXLOG(void)
48904895
*/
48914896
checkPointLoc=ControlFile->checkPoint;
48924897
RedoStartLSN=ControlFile->checkPointCopy.redo;
4893-
record=ReadCheckpointRecord(xlogreader,checkPointLoc,1);
4898+
record=ReadCheckpointRecord(xlogreader,checkPointLoc,1, true);
48944899
if (record!=NULL)
48954900
{
48964901
ereport(DEBUG1,
@@ -4909,7 +4914,7 @@ StartupXLOG(void)
49094914
else
49104915
{
49114916
checkPointLoc=ControlFile->prevCheckPoint;
4912-
record=ReadCheckpointRecord(xlogreader,checkPointLoc,2);
4917+
record=ReadCheckpointRecord(xlogreader,checkPointLoc,2, true);
49134918
if (record!=NULL)
49144919
{
49154920
ereport(LOG,
@@ -5393,22 +5398,33 @@ StartupXLOG(void)
53935398
}
53945399

53955400
/*
5396-
* Before replaying this record, check ifit is a shutdown
5397-
*checkpoint record thatcauses the current timeline to
5398-
*change. The checkpoint record is already considered to be
5399-
*part of the new timeline,so we update ThisTimeLineID
5400-
*before replaying it.That's important so that replayEndTLI,
5401-
*which isrecorded as the minimum recovery point's TLI if
5401+
* Before replaying this record, check ifthis record
5402+
* causes the current timeline to change. The record is
5403+
*already considered to be part of the new timeline,
5404+
* so we update ThisTimeLineID before replaying it.
5405+
* That's important so that replayEndTLI, which is
5406+
* recorded as the minimum recovery point's TLI if
54025407
* recovery stops after this record, is set correctly.
54035408
*/
5404-
if (record->xl_rmid==RM_XLOG_ID&&
5405-
(record->xl_info& ~XLR_INFO_MASK)==XLOG_CHECKPOINT_SHUTDOWN)
5409+
if (record->xl_rmid==RM_XLOG_ID)
54065410
{
5407-
CheckPointcheckPoint;
5408-
TimeLineIDnewTLI;
5411+
TimeLineIDnewTLI=ThisTimeLineID;
5412+
uint8info=record->xl_info& ~XLR_INFO_MASK;
5413+
5414+
if (info==XLOG_CHECKPOINT_SHUTDOWN)
5415+
{
5416+
CheckPointcheckPoint;
5417+
5418+
memcpy(&checkPoint,XLogRecGetData(record),sizeof(CheckPoint));
5419+
newTLI=checkPoint.ThisTimeLineID;
5420+
}
5421+
elseif (info==XLOG_END_OF_RECOVERY)
5422+
{
5423+
xl_end_of_recoveryxlrec;
54095424

5410-
memcpy(&checkPoint,XLogRecGetData(record),sizeof(CheckPoint));
5411-
newTLI=checkPoint.ThisTimeLineID;
5425+
memcpy(&xlrec,XLogRecGetData(record),sizeof(xl_end_of_recovery));
5426+
newTLI=xlrec.ThisTimeLineID;
5427+
}
54125428

54135429
if (newTLI!=ThisTimeLineID)
54145430
{
@@ -5729,9 +5745,36 @@ StartupXLOG(void)
57295745
* allows some extra error checking in xlog_redo.
57305746
*/
57315747
if (bgwriterLaunched)
5732-
RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
5733-
CHECKPOINT_IMMEDIATE |
5734-
CHECKPOINT_WAIT);
5748+
{
5749+
boolcheckpoint_wait= true;
5750+
5751+
/*
5752+
* If we've been explicitly promoted with fast option,
5753+
* end of recovery without a checkpoint if possible.
5754+
*/
5755+
if (fast_promote)
5756+
{
5757+
checkPointLoc=ControlFile->prevCheckPoint;
5758+
record=ReadCheckpointRecord(xlogreader,checkPointLoc,2, false);
5759+
if (record!=NULL)
5760+
{
5761+
checkpoint_wait= false;
5762+
CreateEndOfRecoveryRecord();
5763+
}
5764+
}
5765+
5766+
/*
5767+
* In most cases we will wait for a full checkpoint to complete.
5768+
*
5769+
* If not, issue a normal, non-immediate checkpoint but don't wait.
5770+
*/
5771+
if (checkpoint_wait)
5772+
RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
5773+
CHECKPOINT_IMMEDIATE |
5774+
CHECKPOINT_WAIT);
5775+
else
5776+
RequestCheckpoint(0);/* No flags */
5777+
}
57355778
else
57365779
CreateCheckPoint(CHECKPOINT_END_OF_RECOVERY |CHECKPOINT_IMMEDIATE);
57375780

@@ -6060,12 +6103,15 @@ LocalSetXLogInsertAllowed(void)
60606103
*/
60616104
staticXLogRecord*
60626105
ReadCheckpointRecord(XLogReaderState*xlogreader,XLogRecPtrRecPtr,
6063-
intwhichChkpt)
6106+
intwhichChkpt,boolreport)
60646107
{
60656108
XLogRecord*record;
60666109

60676110
if (!XRecOffIsValid(RecPtr))
60686111
{
6112+
if (!report)
6113+
returnNULL;
6114+
60696115
switch (whichChkpt)
60706116
{
60716117
case1:
@@ -6088,6 +6134,9 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
60886134

60896135
if (record==NULL)
60906136
{
6137+
if (!report)
6138+
returnNULL;
6139+
60916140
switch (whichChkpt)
60926141
{
60936142
case1:
@@ -6882,6 +6931,44 @@ CreateCheckPoint(int flags)
68826931
LWLockRelease(CheckpointLock);
68836932
}
68846933

6934+
/*
6935+
* Mark the end of recovery in WAL though without running a full checkpoint.
6936+
* We can expect that a restartpoint is likely to be in progress as we
6937+
* do this, though we are unwilling to wait for it to complete. So be
6938+
* careful to avoid taking the CheckpointLock anywhere here.
6939+
*
6940+
* CreateRestartPoint() allows for the case where recovery may end before
6941+
* the restartpoint completes so there is no concern of concurrent behaviour.
6942+
*/
6943+
void
6944+
CreateEndOfRecoveryRecord(void)
6945+
{
6946+
xl_end_of_recoveryxlrec;
6947+
XLogRecDatardata;
6948+
6949+
/* sanity check */
6950+
if (!RecoveryInProgress())
6951+
elog(ERROR,"can only be used to end recovery");
6952+
6953+
xlrec.end_time=time(NULL);
6954+
xlrec.ThisTimeLineID=ThisTimeLineID;
6955+
6956+
LocalSetXLogInsertAllowed();
6957+
6958+
START_CRIT_SECTION();
6959+
6960+
rdata.data= (char*)&xlrec;
6961+
rdata.len=sizeof(xl_end_of_recovery);
6962+
rdata.buffer=InvalidBuffer;
6963+
rdata.next=NULL;
6964+
6965+
(void)XLogInsert(RM_XLOG_ID,XLOG_END_OF_RECOVERY,&rdata);
6966+
6967+
END_CRIT_SECTION();
6968+
6969+
LocalXLogInsertAllowed=-1;/* return to "check" state */
6970+
}
6971+
68856972
/*
68866973
* Flush all data in shared memory to disk, and fsync
68876974
*
@@ -7613,6 +7700,27 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
76137700

76147701
RecoveryRestartPoint(&checkPoint);
76157702
}
7703+
elseif (info==XLOG_END_OF_RECOVERY)
7704+
{
7705+
xl_end_of_recoveryxlrec;
7706+
7707+
memcpy(&xlrec,XLogRecGetData(record),sizeof(xl_end_of_recovery));
7708+
7709+
/*
7710+
* For Hot Standby, we could treat this like a Shutdown Checkpoint,
7711+
* but this case is rarer and harder to test, so the benefit doesn't
7712+
* outweigh the potential extra cost of maintenance.
7713+
*/
7714+
7715+
/*
7716+
* We should've already switched to the new TLI before replaying this
7717+
* record.
7718+
*/
7719+
if (xlrec.ThisTimeLineID!=ThisTimeLineID)
7720+
ereport(PANIC,
7721+
(errmsg("unexpected timeline ID %u (should be %u) in checkpoint record",
7722+
xlrec.ThisTimeLineID,ThisTimeLineID)));
7723+
}
76167724
elseif (info==XLOG_NOOP)
76177725
{
76187726
/* nothing to do here */
@@ -9405,8 +9513,39 @@ CheckForStandbyTrigger(void)
94059513

94069514
if (IsPromoteTriggered())
94079515
{
9408-
ereport(LOG,
9516+
/*
9517+
* In 9.1 and 9.2 the postmaster unlinked the promote file
9518+
* inside the signal handler. We now leave the file in place
9519+
* and let the Startup process do the unlink. This allows
9520+
* Startup to know whether we're doing fast or normal
9521+
* promotion. Fast promotion takes precedence.
9522+
*/
9523+
if (stat(FAST_PROMOTE_SIGNAL_FILE,&stat_buf)==0)
9524+
{
9525+
unlink(FAST_PROMOTE_SIGNAL_FILE);
9526+
unlink(PROMOTE_SIGNAL_FILE);
9527+
fast_promote= true;
9528+
}
9529+
elseif (stat(PROMOTE_SIGNAL_FILE,&stat_buf)==0)
9530+
{
9531+
unlink(PROMOTE_SIGNAL_FILE);
9532+
fast_promote= false;
9533+
}
9534+
9535+
/*
9536+
* We only look for fast promote via the pg_ctl promote option.
9537+
* It would be possible to extend trigger file support for the
9538+
* fast promotion option but that wouldn't be backwards compatible
9539+
* anyway and we're looking to focus further work on the promote
9540+
* option as the right way to signal end of recovery.
9541+
*/
9542+
if (fast_promote)
9543+
ereport(LOG,
9544+
(errmsg("received fast promote request")));
9545+
else
9546+
ereport(LOG,
94099547
(errmsg("received promote request")));
9548+
94109549
ResetPromoteTriggered();
94119550
triggered= true;
94129551
return true;
@@ -9435,15 +9574,10 @@ CheckPromoteSignal(void)
94359574
{
94369575
structstatstat_buf;
94379576

9438-
if (stat(PROMOTE_SIGNAL_FILE,&stat_buf)==0)
9439-
{
9440-
/*
9441-
* Since we are in a signal handler, it's not safe to elog. We
9442-
* silently ignore any error from unlink.
9443-
*/
9444-
unlink(PROMOTE_SIGNAL_FILE);
9577+
if (stat(PROMOTE_SIGNAL_FILE,&stat_buf)==0||
9578+
stat(FAST_PROMOTE_SIGNAL_FILE,&stat_buf)==0)
94459579
return true;
9446-
}
9580+
94479581
return false;
94489582
}
94499583

‎src/bin/pg_ctl/pg_ctl.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,15 @@ do_promote(void)
11361136
exit(1);
11371137
}
11381138

1139+
/*
1140+
* Use two different kinds of promotion file so we can understand
1141+
* the difference between smart and fast promotion.
1142+
*/
1143+
if (shutdown_mode >=FAST_MODE)
1144+
snprintf(promote_file,MAXPGPATH,"%s/fast_promote",pg_data);
1145+
else
1146+
snprintf(promote_file,MAXPGPATH,"%s/promote",pg_data);
1147+
11391148
if ((prmfile=fopen(promote_file,"w"))==NULL)
11401149
{
11411150
write_stderr(_("%s: could not create promote signal file \"%s\": %s\n"),
@@ -1799,7 +1808,7 @@ do_help(void)
17991808
" [-o \"OPTIONS\"]\n"),progname);
18001809
printf(_(" %s reload [-D DATADIR] [-s]\n"),progname);
18011810
printf(_(" %s status [-D DATADIR]\n"),progname);
1802-
printf(_(" %s promote [-D DATADIR] [-s]\n"),progname);
1811+
printf(_(" %s promote [-D DATADIR] [-s] [-m PROMOTION-MODE]\n"),progname);
18031812
printf(_(" %s kill SIGNALNAME PID\n"),progname);
18041813
#if defined(WIN32)|| defined(__CYGWIN__)
18051814
printf(_(" %s register [-N SERVICENAME] [-U USERNAME] [-P PASSWORD] [-D DATADIR]\n"
@@ -1828,14 +1837,18 @@ do_help(void)
18281837
printf(_(" -o OPTIONS command line options to pass to postgres\n"
18291838
" (PostgreSQL server executable) or initdb\n"));
18301839
printf(_(" -p PATH-TO-POSTGRES normally not necessary\n"));
1831-
printf(_("\nOptions for stoporrestart:\n"));
1840+
printf(_("\nOptions for stop, restartorpromote:\n"));
18321841
printf(_(" -m, --mode=MODE MODE can be \"smart\", \"fast\", or \"immediate\"\n"));
18331842

18341843
printf(_("\nShutdown modes are:\n"));
18351844
printf(_(" smart quit after all clients have disconnected\n"));
18361845
printf(_(" fast quit directly, with proper shutdown\n"));
18371846
printf(_(" immediate quit without complete shutdown; will lead to recovery on restart\n"));
18381847

1848+
printf(_("\nPromotion modes are:\n"));
1849+
printf(_(" smart promote after performing a checkpoint\n"));
1850+
printf(_(" fast promote quickly without waiting for checkpoint completion\n"));
1851+
18391852
printf(_("\nAllowed signal names for kill:\n"));
18401853
printf(" ABRT HUP INT QUIT TERM USR1 USR2\n");
18411854

@@ -2271,7 +2284,6 @@ main(int argc, char **argv)
22712284
snprintf(pid_file,MAXPGPATH, "%s/postmaster.pid",pg_data);
22722285
snprintf(backup_file,MAXPGPATH, "%s/backup_label",pg_data);
22732286
snprintf(recovery_file,MAXPGPATH, "%s/recovery.conf",pg_data);
2274-
snprintf(promote_file,MAXPGPATH, "%s/promote",pg_data);
22752287
}
22762288

22772289
switch (ctl_command)

‎src/include/access/xlog_internal.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,12 @@ typedef struct xl_restore_point
217217
charrp_name[MAXFNAMELEN];
218218
}xl_restore_point;
219219

220+
/* End of recovery mark, when we don't do an END_OF_RECOVERY checkpoint */
221+
typedefstructxl_end_of_recovery
222+
{
223+
TimestampTzend_time;
224+
TimeLineIDThisTimeLineID;
225+
}xl_end_of_recovery;
220226

221227
/*
222228
* XLogRecord is defined in xlog.h, but we avoid #including that to keep

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp