Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit87a6690

Browse files
committed
Change shutdown sequence to terminate checkpointer last
The main motivation for this change is to have a process that can serializestats after all other processes have terminated. Serializing stats alreadyhappens in checkpointer, even though walsenders can be active longer.The only reason the current shutdown sequence does not actively cause problemsis that walsender currently does not generate any stats. However, there is anupcoming patch changing that.Another need for this change originates in the AIO patchset, where IOworkers (which, in some edge cases, can emit stats of their own) need to runwhile the shutdown checkpoint is being written.This commit changes the shutdown sequence so checkpointer is signalled (viaSIGINT) to trigger writing the shutdown checkpoint without also causingcheckpointer to exit. Once checkpointer wrote the shutdown checkpoint itnotifies postmaster via PMSIGNAL_XLOG_IS_SHUTDOWN and waits for thetermination signal (SIGUSR2, as before). Checkpointer now is terminated afterall children, other than dead-end children and logger, have been terminated,tracked using the new PM_WAIT_CHECKPOINTER PMState.Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com>Discussion:https://postgr.es/m/kgng5nrvnlv335evmsuvpnh354rw7qyazl73kdysev2cr2v5zu@m3cfzxicm5kp
1 parent04ace17 commit87a6690

File tree

4 files changed

+201
-69
lines changed

4 files changed

+201
-69
lines changed

‎src/backend/postmaster/checkpointer.c

Lines changed: 95 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,13 @@
1010
* fill WAL segments; the checkpointer itself doesn't watch for the
1111
* condition.)
1212
*
13-
* Normal termination is by SIGUSR2, which instructs the checkpointer to
14-
* execute a shutdown checkpoint and then exit(0). (All backends must be
15-
* stopped before SIGUSR2 is issued!) Emergency termination is by SIGQUIT;
16-
* like any backend, the checkpointer will simply abort and exit on SIGQUIT.
13+
* The normal termination sequence is that checkpointer is instructed to
14+
* execute the shutdown checkpoint by SIGINT. After that checkpointer waits
15+
* to be terminated via SIGUSR2, which instructs the checkpointer to exit(0).
16+
* All backends must be stopped before SIGINT or SIGUSR2 is issued!
17+
*
18+
* Emergency termination is by SIGQUIT; like any backend, the checkpointer
19+
* will simply abort and exit on SIGQUIT.
1720
*
1821
* If the checkpointer exits unexpectedly, the postmaster treats that the same
1922
* as a backend crash: shared memory may be corrupted, so remaining backends
@@ -51,6 +54,7 @@
5154
#include"storage/fd.h"
5255
#include"storage/ipc.h"
5356
#include"storage/lwlock.h"
57+
#include"storage/pmsignal.h"
5458
#include"storage/proc.h"
5559
#include"storage/procsignal.h"
5660
#include"storage/shmem.h"
@@ -141,6 +145,7 @@ doubleCheckPointCompletionTarget = 0.9;
141145
* Private state
142146
*/
143147
staticboolckpt_active= false;
148+
staticvolatilesig_atomic_tShutdownXLOGPending= false;
144149

145150
/* these values are valid when ckpt_active is true: */
146151
staticpg_time_tckpt_start_time;
@@ -159,6 +164,9 @@ static bool ImmediateCheckpointRequested(void);
159164
staticboolCompactCheckpointerRequestQueue(void);
160165
staticvoidUpdateSharedMemoryConfig(void);
161166

167+
/* Signal handlers */
168+
staticvoidReqShutdownXLOG(SIGNAL_ARGS);
169+
162170

163171
/*
164172
* Main entry point for checkpointer process
@@ -188,7 +196,7 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
188196
* tell us it's okay to shut down (via SIGUSR2).
189197
*/
190198
pqsignal(SIGHUP,SignalHandlerForConfigReload);
191-
pqsignal(SIGINT,SIG_IGN);
199+
pqsignal(SIGINT,ReqShutdownXLOG);
192200
pqsignal(SIGTERM,SIG_IGN);/* ignore SIGTERM */
193201
/* SIGQUIT handler was already set up by InitPostmasterChild */
194202
pqsignal(SIGALRM,SIG_IGN);
@@ -211,8 +219,11 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
211219
* process during a normal shutdown, and since checkpointer is shut down
212220
* very late...
213221
*
214-
* Walsenders are shut down after the checkpointer, but currently don't
215-
* report stats. If that changes, we need a more complicated solution.
222+
* While e.g. walsenders are active after the shutdown checkpoint has been
223+
* written (and thus could produce more stats), checkpointer stays around
224+
* after the shutdown checkpoint has been written. postmaster will only
225+
* signal checkpointer to exit after all processes that could emit stats
226+
* have been shut down.
216227
*/
217228
before_shmem_exit(pgstat_before_server_shutdown,0);
218229

@@ -327,7 +338,8 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
327338
ProcGlobal->checkpointerProc=MyProcNumber;
328339

329340
/*
330-
* Loop forever
341+
* Loop until we've been asked to write the shutdown checkpoint or
342+
* terminate.
331343
*/
332344
for (;;)
333345
{
@@ -346,7 +358,10 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
346358
* Process any requests or signals received recently.
347359
*/
348360
AbsorbSyncRequests();
361+
349362
HandleCheckpointerInterrupts();
363+
if (ShutdownXLOGPending||ShutdownRequestPending)
364+
break;
350365

351366
/*
352367
* Detect a pending checkpoint request by checking whether the flags
@@ -517,8 +532,13 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
517532

518533
ckpt_active= false;
519534

520-
/* We may have received an interrupt during the checkpoint. */
535+
/*
536+
* We may have received an interrupt during the checkpoint and the
537+
* latch might have been reset (e.g. in CheckpointWriteDelay).
538+
*/
521539
HandleCheckpointerInterrupts();
540+
if (ShutdownXLOGPending||ShutdownRequestPending)
541+
break;
522542
}
523543

524544
/* Check for archive_timeout and switch xlog files if necessary. */
@@ -557,6 +577,57 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
557577
cur_timeout*1000L/* convert to ms */ ,
558578
WAIT_EVENT_CHECKPOINTER_MAIN);
559579
}
580+
581+
/*
582+
* From here on, elog(ERROR) should end with exit(1), not send control
583+
* back to the sigsetjmp block above.
584+
*/
585+
ExitOnAnyError= true;
586+
587+
if (ShutdownXLOGPending)
588+
{
589+
/*
590+
* Close down the database.
591+
*
592+
* Since ShutdownXLOG() creates restartpoint or checkpoint, and
593+
* updates the statistics, increment the checkpoint request and flush
594+
* out pending statistic.
595+
*/
596+
PendingCheckpointerStats.num_requested++;
597+
ShutdownXLOG(0,0);
598+
pgstat_report_checkpointer();
599+
pgstat_report_wal(true);
600+
601+
/*
602+
* Tell postmaster that we're done.
603+
*/
604+
SendPostmasterSignal(PMSIGNAL_XLOG_IS_SHUTDOWN);
605+
ShutdownXLOGPending= false;
606+
}
607+
608+
/*
609+
* Wait until we're asked to shut down. By separating the writing of the
610+
* shutdown checkpoint from checkpointer exiting, checkpointer can perform
611+
* some should-be-as-late-as-possible work like writing out stats.
612+
*/
613+
for (;;)
614+
{
615+
/* Clear any already-pending wakeups */
616+
ResetLatch(MyLatch);
617+
618+
HandleCheckpointerInterrupts();
619+
620+
if (ShutdownRequestPending)
621+
break;
622+
623+
(void)WaitLatch(MyLatch,
624+
WL_LATCH_SET |WL_EXIT_ON_PM_DEATH,
625+
0,
626+
WAIT_EVENT_CHECKPOINTER_SHUTDOWN);
627+
}
628+
629+
/* Normal exit from the checkpointer is here */
630+
proc_exit(0);/* done */
560631
}
561632

562633
/*
@@ -586,29 +657,6 @@ HandleCheckpointerInterrupts(void)
586657
*/
587658
UpdateSharedMemoryConfig();
588659
}
589-
if (ShutdownRequestPending)
590-
{
591-
/*
592-
* From here on, elog(ERROR) should end with exit(1), not send control
593-
* back to the sigsetjmp block above
594-
*/
595-
ExitOnAnyError= true;
596-
597-
/*
598-
* Close down the database.
599-
*
600-
* Since ShutdownXLOG() creates restartpoint or checkpoint, and
601-
* updates the statistics, increment the checkpoint request and flush
602-
* out pending statistic.
603-
*/
604-
PendingCheckpointerStats.num_requested++;
605-
ShutdownXLOG(0,0);
606-
pgstat_report_checkpointer();
607-
pgstat_report_wal(true);
608-
609-
/* Normal exit from the checkpointer is here */
610-
proc_exit(0);/* done */
611-
}
612660

613661
/* Perform logging of memory contexts of this process */
614662
if (LogMemoryContextPending)
@@ -729,6 +777,7 @@ CheckpointWriteDelay(int flags, double progress)
729777
* in which case we just try to catch up as quickly as possible.
730778
*/
731779
if (!(flags&CHECKPOINT_IMMEDIATE)&&
780+
!ShutdownXLOGPending&&
732781
!ShutdownRequestPending&&
733782
!ImmediateCheckpointRequested()&&
734783
IsCheckpointOnSchedule(progress))
@@ -857,6 +906,20 @@ IsCheckpointOnSchedule(double progress)
857906
}
858907

859908

909+
/* --------------------------------
910+
*signal handler routines
911+
* --------------------------------
912+
*/
913+
914+
/* SIGINT: set flag to trigger writing of shutdown checkpoint */
915+
staticvoid
916+
ReqShutdownXLOG(SIGNAL_ARGS)
917+
{
918+
ShutdownXLOGPending= true;
919+
SetLatch(MyLatch);
920+
}
921+
922+
860923
/* --------------------------------
861924
*communication with backends
862925
* --------------------------------

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp