Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit39b547f

Browse files
committed
Prevent freshly-started backend from ignoring SIGUSR1, per race condition
observed by Inoue. Also, don't call ProcRemove() from postmaster if wehave detected a backend crash --- too risky if shared memory is corrupted.It's not needed anyway, considering we are going to reinitialize sharedmemory and semaphores as soon as the last child is dead.
1 parente4d97cb commit39b547f

File tree

2 files changed

+74
-63
lines changed

2 files changed

+74
-63
lines changed

‎src/backend/postmaster/postmaster.c

Lines changed: 57 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*
1212
*
1313
* IDENTIFICATION
14-
* $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.200 2000/12/18 18:45:04 momjian Exp $
14+
* $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.201 2000/12/20 21:51:52 tgl Exp $
1515
*
1616
* NOTES
1717
*
@@ -180,7 +180,7 @@ static time_tcheckpointed = 0;
180180

181181
staticintShutdown=NoShutdown;
182182

183-
staticboolFatalError= false;
183+
staticboolFatalError= false;/* T if recovering from backend crash */
184184

185185
/*
186186
* State for assigning random salts and cancel keys.
@@ -649,7 +649,7 @@ PostmasterMain(int argc, char *argv[])
649649
pqsignal(SIGTERM,pmdie);/* wait for children and ShutdownDataBase */
650650
pqsignal(SIGALRM,SIG_IGN);/* ignored */
651651
pqsignal(SIGPIPE,SIG_IGN);/* ignored */
652-
pqsignal(SIGUSR1,SIG_IGN);/* ignored */
652+
pqsignal(SIGUSR1,pmdie);/*currentlyignored, but see note in pmdie */
653653
pqsignal(SIGUSR2,pmdie);/* send SIGUSR2, don't die */
654654
pqsignal(SIGCHLD,reaper);/* handle child termination */
655655
pqsignal(SIGTTIN,SIG_IGN);/* ignored */
@@ -1329,6 +1329,18 @@ pmdie(SIGNAL_ARGS)
13291329

13301330
switch (postgres_signal_arg)
13311331
{
1332+
caseSIGUSR1:
1333+
/*
1334+
* Currently the postmaster ignores SIGUSR1 (maybe it should
1335+
* do something useful instead?) But we must have some handler
1336+
* installed for SIGUSR1, not just set it to SIG_IGN. Else, a
1337+
* freshly spawned backend would likewise have it set to SIG_IGN,
1338+
* which would mean the backend would ignore any attempt to kill
1339+
* it before it had gotten as far as setting up its own handler.
1340+
*/
1341+
errno=save_errno;
1342+
return;
1343+
13321344
caseSIGUSR2:
13331345

13341346
/*
@@ -1511,7 +1523,7 @@ reaper(SIGNAL_ARGS)
15111523
ExitPostmaster(1);
15121524
}
15131525
StartupPID=0;
1514-
FatalError= false;
1526+
FatalError= false;/* done with recovery */
15151527
if (Shutdown>NoShutdown)
15161528
{
15171529
if (ShutdownPID>0)
@@ -1539,12 +1551,7 @@ reaper(SIGNAL_ARGS)
15391551
/*
15401552
* Wait for all children exit, then reset shmem and StartupDataBase.
15411553
*/
1542-
if (DLGetHead(BackendList))
1543-
{
1544-
errno=save_errno;
1545-
return;
1546-
}
1547-
if (StartupPID>0||ShutdownPID>0)
1554+
if (DLGetHead(BackendList)||StartupPID>0||ShutdownPID>0)
15481555
{
15491556
errno=save_errno;
15501557
return;
@@ -1595,21 +1602,18 @@ CleanupProc(int pid,
15951602
Dlelem*curr,
15961603
*next;
15971604
Backend*bp;
1598-
intsig;
15991605

16001606
if (DebugLvl)
1601-
{
16021607
fprintf(stderr,"%s: CleanupProc: pid %d exited with status %d\n",
16031608
progname,pid,exitstatus);
1604-
}
16051609

16061610
/*
16071611
* If a backend dies in an ugly way (i.e. exit status not 0) then we
16081612
* must signal all other backends to quickdie.If exit status is zero
16091613
* we assume everything is hunky dory and simply remove the backend
16101614
* from the active backend list.
16111615
*/
1612-
if (!exitstatus)
1616+
if (exitstatus==0)
16131617
{
16141618
curr=DLGetHead(BackendList);
16151619
while (curr)
@@ -1628,73 +1632,78 @@ CleanupProc(int pid,
16281632
if (pid==CheckPointPID)
16291633
{
16301634
CheckPointPID=0;
1631-
checkpointed=time(NULL);
1635+
if (!FatalError)
1636+
checkpointed=time(NULL);
16321637
}
16331638
else
1634-
ProcRemove(pid);
1639+
{
1640+
/* Why is this done here, and not by the backend itself? */
1641+
if (!FatalError)
1642+
ProcRemove(pid);
1643+
}
16351644

16361645
return;
16371646
}
16381647

16391648
if (!FatalError)
16401649
{
1650+
/* Make log entry unless we did so already */
16411651
tnow=time(NULL);
16421652
fprintf(stderr,"Server process (pid %d) exited with status %d at %s"
16431653
"Terminating any active server processes...\n",
16441654
pid,exitstatus,ctime(&tnow));
16451655
fflush(stderr);
16461656
}
1647-
FatalError= true;
1657+
16481658
curr=DLGetHead(BackendList);
16491659
while (curr)
16501660
{
16511661
next=DLGetSucc(curr);
16521662
bp= (Backend*)DLE_VAL(curr);
1653-
1654-
/*
1655-
* SIGUSR1 is the special signal that says exit without proc_exit
1656-
* and let the user know what's going on. ProcSemaphoreKill()
1657-
* cleans up the backends semaphore. If SendStop is set (-s on
1658-
* command line), then we send a SIGSTOP so that we can core dumps
1659-
* from all backends by hand.
1660-
*/
1661-
sig= (SendStop) ?SIGSTOP :SIGUSR1;
16621663
if (bp->pid!=pid)
16631664
{
1664-
if (DebugLvl)
1665-
fprintf(stderr,"%s: CleanupProc: sending %s to process %d\n",
1666-
progname,
1667-
(sig==SIGUSR1)
1668-
?"SIGUSR1" :"SIGSTOP",
1669-
bp->pid);
1670-
kill(bp->pid,sig);
1665+
/*
1666+
* This backend is still alive. Unless we did so already,
1667+
* tell it to commit hara-kiri.
1668+
*
1669+
* SIGUSR1 is the special signal that says exit without proc_exit
1670+
* and let the user know what's going on. But if SendStop is set
1671+
* (-s on command line), then we send SIGSTOP instead, so that we
1672+
* can get core dumps from all backends by hand.
1673+
*/
1674+
if (!FatalError)
1675+
{
1676+
if (DebugLvl)
1677+
fprintf(stderr,"%s: CleanupProc: sending %s to process %d\n",
1678+
progname,
1679+
(SendStop ?"SIGSTOP" :"SIGUSR1"),
1680+
bp->pid);
1681+
kill(bp->pid, (SendStop ?SIGSTOP :SIGUSR1));
1682+
}
16711683
}
16721684
else
16731685
{
1674-
16751686
/*
1676-
* I don't like that we call ProcRemove() here, assuming that
1677-
* shmem may be corrupted! But is there another way to free
1678-
* backend semaphores? Actually, I believe that we need not in
1679-
* per backend semaphore at all (we use them to wait on lock
1680-
* only, couldn't we just sigpause?), so probably we'll remove
1681-
* this call from here someday. -- vadim 04-10-1999
1687+
* Found entry for freshly-dead backend, so remove it.
1688+
*
1689+
* Don't call ProcRemove() here, since shmem may be corrupted!
1690+
* We are going to reinitialize shmem and semaphores anyway
1691+
* once all the children are dead, so no need for it.
16821692
*/
1683-
if (pid==CheckPointPID)
1684-
{
1685-
CheckPointPID=0;
1686-
checkpointed=0;
1687-
}
1688-
else
1689-
ProcRemove(pid);
1690-
16911693
DLRemove(curr);
16921694
free(bp);
16931695
DLFreeElem(curr);
16941696
}
16951697
curr=next;
16961698
}
16971699

1700+
if (pid==CheckPointPID)
1701+
{
1702+
CheckPointPID=0;
1703+
checkpointed=0;
1704+
}
1705+
1706+
FatalError= true;
16981707
}
16991708

17001709
/*

‎src/backend/tcop/postgres.c

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.197 2000/12/18 18:45:05 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.198 2000/12/20 21:51:52 tgl Exp $
1212
*
1313
* NOTES
1414
* this is the "main" module of the postgres backend and
@@ -1462,21 +1462,12 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[], const cha
14621462
Assert(DataDir);
14631463

14641464
/*
1465-
* 1. Set BlockSig and UnBlockSig masks. 2. Set up signal handlers. 3.
1466-
* Allow only SIGUSR1 signal (we never block it) during
1467-
* initialization.
1465+
* Set up signal handlers and masks.
14681466
*
1469-
* Note that postmaster already blocked ALL signals to make us happy.
1467+
* Note that postmaster blocked all signals before forking child process,
1468+
* so there is no race condition whereby we might receive a signal before
1469+
* we have set up the handler.
14701470
*/
1471-
pqinitmask();
1472-
1473-
#ifdefHAVE_SIGPROCMASK
1474-
sigdelset(&BlockSig,SIGUSR1);
1475-
#else
1476-
BlockSig &= ~(sigmask(SIGUSR1));
1477-
#endif
1478-
1479-
PG_SETMASK(&BlockSig);/* block everything except SIGUSR1 */
14801471

14811472
pqsignal(SIGHUP,SigHupHandler);/* set flag to read config file */
14821473
pqsignal(SIGINT,QueryCancelHandler);/* cancel current query */
@@ -1499,6 +1490,17 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[], const cha
14991490
pqsignal(SIGTTOU,SIG_DFL);
15001491
pqsignal(SIGCONT,SIG_DFL);
15011492

1493+
pqinitmask();
1494+
1495+
/* We allow SIGUSR1 (quickdie) at all times */
1496+
#ifdefHAVE_SIGPROCMASK
1497+
sigdelset(&BlockSig,SIGUSR1);
1498+
#else
1499+
BlockSig &= ~(sigmask(SIGUSR1));
1500+
#endif
1501+
1502+
PG_SETMASK(&BlockSig);/* block everything except SIGUSR1 */
1503+
15021504

15031505
if (IsUnderPostmaster)
15041506
{
@@ -1649,7 +1651,7 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[], const cha
16491651
if (!IsUnderPostmaster)
16501652
{
16511653
puts("\nPOSTGRES backend interactive interface ");
1652-
puts("$Revision: 1.197 $ $Date: 2000/12/18 18:45:05 $\n");
1654+
puts("$Revision: 1.198 $ $Date: 2000/12/20 21:51:52 $\n");
16531655
}
16541656

16551657
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp