Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7e784d1

Browse files
committed
Improve client error messages for immediate-stop situations.
Up to now, if the DBA issued "pg_ctl stop -m immediate", the messagesent to clients was the same as for a crash-and-restart situation.This is confusing, not least because the message claims that thedatabase will soon be up again, something we have no businesspredicting.Improve things so that we can generate distinct messages for the twocases (and also recognize an ad-hoc SIGQUIT, should somebody try that).To do that, add a field to pmsignal.c's shared memory data structurethat the postmaster sets just before broadcasting SIGQUIT to itschildren. No interlocking seems to be necessary; the interveningsignal-sending and signal-receipt should sufficiently serialize accessesto the field. Hence, this isn't any riskier than the existing usagesof pmsignal.c.We might in future extend this idea to improve otherpostmaster-to-children signal scenarios, although none of themcurrently seem to be as badly overloaded as SIGQUIT.Discussion:https://postgr.es/m/559291.1608587013@sss.pgh.pa.us
1 parent90fbf7c commit7e784d1

File tree

4 files changed

+86
-16
lines changed

4 files changed

+86
-16
lines changed

‎src/backend/postmaster/postmaster.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ intReservedBackends;
218218
/* The socket(s) we're listening to. */
219219
#defineMAXLISTEN64
220220
staticpgsocketListenSocket[MAXLISTEN];
221+
221222
/*
222223
* These globals control the behavior of the postmaster in case some
223224
* backend dumps core. Normally, it kills all peers of the dead backend
@@ -2887,6 +2888,8 @@ pmdie(SIGNAL_ARGS)
28872888
sd_notify(0,"STOPPING=1");
28882889
#endif
28892890

2891+
/* tell children to shut down ASAP */
2892+
SetQuitSignalReason(PMQUIT_FOR_STOP);
28902893
TerminateChildren(SIGQUIT);
28912894
pmState=PM_WAIT_BACKENDS;
28922895

@@ -3464,6 +3467,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
34643467
LogChildExit(LOG,procname,pid,exitstatus);
34653468
ereport(LOG,
34663469
(errmsg("terminating any other active server processes")));
3470+
SetQuitSignalReason(PMQUIT_FOR_CRASH);
34673471
}
34683472

34693473
/* Process background workers. */

‎src/backend/storage/ipc/pmsignal.c

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*-------------------------------------------------------------------------
22
*
33
* pmsignal.c
4-
* routines for signaling the postmasterfrom its child processes
4+
* routines for signalingbetweenthe postmasterand its child processes
55
*
66
*
77
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
@@ -55,6 +55,10 @@
5555
* but carries the extra information that the child is a WAL sender.
5656
* WAL senders too start in ACTIVE state, but switch to WALSENDER once they
5757
* start streaming the WAL (and they never go back to ACTIVE after that).
58+
*
59+
* We also have a shared-memory field that is used for communication in
60+
* the opposite direction, from postmaster to children: it tells why the
61+
* postmaster has broadcasted SIGQUIT signals, if indeed it has done so.
5862
*/
5963

6064
#definePM_CHILD_UNUSED0/* these values must fit in sig_atomic_t */
@@ -65,8 +69,10 @@
6569
/* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */
6670
structPMSignalData
6771
{
68-
/* per-reason flags */
72+
/* per-reason flagsfor signaling the postmaster*/
6973
sig_atomic_tPMSignalFlags[NUM_PMSIGNALS];
74+
/* global flags for signals from postmaster to children */
75+
QuitSignalReasonsigquit_reason;/* why SIGQUIT was sent */
7076
/* per-child-process flags */
7177
intnum_child_flags;/* # of entries in PMChildFlags[] */
7278
intnext_child_flag;/* next slot to try to assign */
@@ -134,6 +140,7 @@ PMSignalShmemInit(void)
134140

135141
if (!found)
136142
{
143+
/* initialize all flags to zeroes */
137144
MemSet(unvolatize(PMSignalData*,PMSignalState),0,PMSignalShmemSize());
138145
PMSignalState->num_child_flags=MaxLivePostmasterChildren();
139146
}
@@ -171,6 +178,34 @@ CheckPostmasterSignal(PMSignalReason reason)
171178
return false;
172179
}
173180

181+
/*
182+
* SetQuitSignalReason - broadcast the reason for a system shutdown.
183+
* Should be called by postmaster before sending SIGQUIT to children.
184+
*
185+
* Note: in a crash-and-restart scenario, the "reason" field gets cleared
186+
* as a part of rebuilding shared memory; the postmaster need not do it
187+
* explicitly.
188+
*/
189+
void
190+
SetQuitSignalReason(QuitSignalReasonreason)
191+
{
192+
PMSignalState->sigquit_reason=reason;
193+
}
194+
195+
/*
196+
* GetQuitSignalReason - obtain the reason for a system shutdown.
197+
* Called by child processes when they receive SIGQUIT.
198+
* If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT.
199+
*/
200+
QuitSignalReason
201+
GetQuitSignalReason(void)
202+
{
203+
/* This is called in signal handlers, so be extra paranoid. */
204+
if (!IsUnderPostmaster||PMSignalState==NULL)
205+
returnPMQUIT_NOT_SENT;
206+
returnPMSignalState->sigquit_reason;
207+
}
208+
174209

175210
/*
176211
* AssignPostmasterChildSlot - select an unused slot for a new postmaster

‎src/backend/tcop/postgres.c

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
#include"rewrite/rewriteHandler.h"
6868
#include"storage/bufmgr.h"
6969
#include"storage/ipc.h"
70+
#include"storage/pmsignal.h"
7071
#include"storage/proc.h"
7172
#include"storage/procsignal.h"
7273
#include"storage/sinval.h"
@@ -2752,8 +2753,8 @@ drop_unnamed_stmt(void)
27522753
/*
27532754
* quickdie() occurs when signaled SIGQUIT by the postmaster.
27542755
*
2755-
*Somebackend has bought the farm,
2756-
* so we need to stop what we're doing and exit.
2756+
*Either somebackend has bought the farm, or we've been told to shut down
2757+
*"immediately";so we need to stop what we're doing and exit.
27572758
*/
27582759
void
27592760
quickdie(SIGNAL_ARGS)
@@ -2788,18 +2789,36 @@ quickdie(SIGNAL_ARGS)
27882789
* wrong, so there's not much to lose. Assuming the postmaster is still
27892790
* running, it will SIGKILL us soon if we get stuck for some reason.
27902791
*
2791-
* Ideallythis should be ereport(FATAL), but then we'd not get control
2792-
* back...
2792+
* Ideallythese should be ereport(FATAL), but then we'd not get control
2793+
* back to force the correct type of process exit.
27932794
*/
2794-
ereport(WARNING,
2795-
(errcode(ERRCODE_CRASH_SHUTDOWN),
2796-
errmsg("terminating connection because of crash of another server process"),
2797-
errdetail("The postmaster has commanded this server process to roll back"
2798-
" the current transaction and exit, because another"
2799-
" server process exited abnormally and possibly corrupted"
2800-
" shared memory."),
2801-
errhint("In a moment you should be able to reconnect to the"
2802-
" database and repeat your command.")));
2795+
switch (GetQuitSignalReason())
2796+
{
2797+
casePMQUIT_NOT_SENT:
2798+
/* Hmm, SIGQUIT arrived out of the blue */
2799+
ereport(WARNING,
2800+
(errcode(ERRCODE_ADMIN_SHUTDOWN),
2801+
errmsg("terminating connection because of unexpected SIGQUIT signal")));
2802+
break;
2803+
casePMQUIT_FOR_CRASH:
2804+
/* A crash-and-restart cycle is in progress */
2805+
ereport(WARNING,
2806+
(errcode(ERRCODE_CRASH_SHUTDOWN),
2807+
errmsg("terminating connection because of crash of another server process"),
2808+
errdetail("The postmaster has commanded this server process to roll back"
2809+
" the current transaction and exit, because another"
2810+
" server process exited abnormally and possibly corrupted"
2811+
" shared memory."),
2812+
errhint("In a moment you should be able to reconnect to the"
2813+
" database and repeat your command.")));
2814+
break;
2815+
casePMQUIT_FOR_STOP:
2816+
/* Immediate-mode stop */
2817+
ereport(WARNING,
2818+
(errcode(ERRCODE_ADMIN_SHUTDOWN),
2819+
errmsg("terminating connection due to immediate shutdown command")));
2820+
break;
2821+
}
28032822

28042823
/*
28052824
* We DO NOT want to run proc_exit() or atexit() callbacks -- we're here

‎src/include/storage/pmsignal.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*-------------------------------------------------------------------------
22
*
33
* pmsignal.h
4-
* routines for signaling the postmasterfrom its child processes
4+
* routines for signalingbetweenthe postmasterand its child processes
55
*
66
*
77
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
@@ -45,6 +45,16 @@ typedef enum
4545
NUM_PMSIGNALS/* Must be last value of enum! */
4646
}PMSignalReason;
4747

48+
/*
49+
* Reasons why the postmaster would send SIGQUIT to its children.
50+
*/
51+
typedefenum
52+
{
53+
PMQUIT_NOT_SENT=0,/* postmaster hasn't sent SIGQUIT */
54+
PMQUIT_FOR_CRASH,/* some other backend bought the farm */
55+
PMQUIT_FOR_STOP/* immediate stop was commanded */
56+
}QuitSignalReason;
57+
4858
/* PMSignalData is an opaque struct, details known only within pmsignal.c */
4959
typedefstructPMSignalDataPMSignalData;
5060

@@ -55,6 +65,8 @@ extern Size PMSignalShmemSize(void);
5565
externvoidPMSignalShmemInit(void);
5666
externvoidSendPostmasterSignal(PMSignalReasonreason);
5767
externboolCheckPostmasterSignal(PMSignalReasonreason);
68+
externvoidSetQuitSignalReason(QuitSignalReasonreason);
69+
externQuitSignalReasonGetQuitSignalReason(void);
5870
externintAssignPostmasterChildSlot(void);
5971
externboolReleasePostmasterChildSlot(intslot);
6072
externboolIsPostmasterChildWalSender(intslot);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp