Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit89fd72c

Browse files
committed
Introduce a pipe between postmaster and each backend, which can be used to
detect postmaster death. Postmaster keeps the write-end of the pipe open,so when it dies, children get EOF in the read-end. That can convenientlybe waited for in select(), which allows eliminating some of the pollingloops that check for postmaster death. This patch doesn't yet change allthe loops to use the new mechanism, expect a follow-on patch to do that.This changes the interface to WaitLatch, so that it takes as argument abitmask of events that it waits for. Possible events are latch set, timeout,postmaster death, and socket becoming readable or writeable.The pipe method behaves slightly differently from the kill() methodpreviously used in PostmasterIsAlive() in the case that postmaster has died,but its parent has not yet read its exit code with waitpid(). The pipereturns EOF as soon as the process dies, but kill() continues to returntrue until waitpid() has been called (IOW while the process is a zombie).Because of that, change PostmasterIsAlive() to use the pipe too, otherwiseWaitLatch() would return immediately with WL_POSTMASTER_DEATH, whilePostmasterIsAlive() would claim it's still alive. That could easily lead tobusy-waiting while postmaster is in zombie state.Peter Geoghegan with further changes by me, reviewed by Fujii Masao andFlorian Pflug.
1 parent9598afa commit89fd72c

File tree

16 files changed

+322
-141
lines changed

16 files changed

+322
-141
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9938,7 +9938,7 @@ HandleStartupProcInterrupts(void)
99389938
* Emergency bailout if postmaster has died. This is to avoid the
99399939
* necessity for manual cleanup of all postmaster children.
99409940
*/
9941-
if (IsUnderPostmaster&& !PostmasterIsAlive(true))
9941+
if (IsUnderPostmaster&& !PostmasterIsAlive())
99429942
exit(1);
99439943
}
99449944

@@ -10165,7 +10165,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
1016510165
/*
1016610166
* Wait for more WAL to arrive, or timeout to be reached
1016710167
*/
10168-
WaitLatch(&XLogCtl->recoveryWakeupLatch,5000000L);
10168+
WaitLatch(&XLogCtl->recoveryWakeupLatch,WL_LATCH_SET |WL_TIMEOUT,5000000L);
1016910169
ResetLatch(&XLogCtl->recoveryWakeupLatch);
1017010170
}
1017110171
else

‎src/backend/port/unix_latch.c

Lines changed: 70 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
#endif
9494

9595
#include"miscadmin.h"
96+
#include"postmaster/postmaster.h"
9697
#include"storage/latch.h"
9798
#include"storage/shmem.h"
9899

@@ -176,34 +177,44 @@ DisownLatch(volatile Latch *latch)
176177
}
177178

178179
/*
179-
* Wait for given latch to be set or until timeout is exceeded.
180-
* If the latch is already set, the function returns immediately.
180+
* Wait for a given latch to be set, postmaster death, or until timeout is
181+
* exceeded. 'wakeEvents' is a bitmask that specifies which of those events
182+
* to wait for. If the latch is already set (and WL_LATCH_SET is given), the
183+
* function returns immediately.
181184
*
182-
* The 'timeout' is given in microseconds, and -1 means wait forever.
183-
*On some platforms, signals cause the timeout to be restarted, so beware
184-
*thatthefunction can sleep for several times longer thanthespecified
185-
* timeout.
185+
* The 'timeout' is given in microseconds. It must be >= 0 if WL_TIMEOUT
186+
*event is given, otherwise it is ignored. On some platforms, signals cause
187+
* thetimeout to be restarted, so beware thatthefunction can sleep for
188+
*several times longer than the specifiedtimeout.
186189
*
187190
* The latch must be owned by the current process, ie. it must be a
188191
* backend-local latch initialized with InitLatch, or a shared latch
189192
* associated with the current process by calling OwnLatch.
190193
*
191-
* Returns 'true' if the latch was set, or 'false' if timeout was reached.
194+
* Returns bit field indicating which condition(s) caused the wake-up. Note
195+
* that if multiple wake-up conditions are true, there is no guarantee that
196+
* we return all of them in one call, but we will return at least one. Also,
197+
* according to the select(2) man page on Linux, select(2) may spuriously
198+
* return and report a file descriptor as readable, when it's not. We use
199+
* select(2), so WaitLatch can also spuriously claim that a socket is
200+
* readable, or postmaster has died, even when none of the wake conditions
201+
* have been satisfied. That should be rare in practice, but the caller
202+
* should not use the return value for anything critical, re-checking the
203+
* situation with PostmasterIsAlive() or read() on a socket if necessary.
192204
*/
193-
bool
194-
WaitLatch(volatileLatch*latch,longtimeout)
205+
int
206+
WaitLatch(volatileLatch*latch,intwakeEvents,longtimeout)
195207
{
196-
returnWaitLatchOrSocket(latch,PGINVALID_SOCKET, false, false,timeout)>0;
208+
returnWaitLatchOrSocket(latch,wakeEvents,PGINVALID_SOCKET,timeout);
197209
}
198210

199211
/*
200-
* Like WaitLatch, but will also return when there's data available in
201-
* 'sock' for reading or writing. Returns 0 if timeout was reached,
202-
* 1 if the latch was set, 2 if the socket became readable or writable.
212+
* Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
213+
* conditions.
203214
*/
204215
int
205-
WaitLatchOrSocket(volatileLatch*latch,pgsocketsock,boolforRead,
206-
boolforWrite,longtimeout)
216+
WaitLatchOrSocket(volatileLatch*latch,intwakeEvents,pgsocketsock,
217+
longtimeout)
207218
{
208219
structtimevaltv,
209220
*tvp=NULL;
@@ -212,19 +223,26 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
212223
intrc;
213224
intresult=0;
214225

215-
if (latch->owner_pid!=MyProcPid)
226+
/* Ignore WL_SOCKET_* events if no valid socket is given */
227+
if (sock==PGINVALID_SOCKET)
228+
wakeEvents &= ~(WL_SOCKET_READABLE |WL_SOCKET_WRITEABLE);
229+
230+
Assert(wakeEvents!=0);/* must have at least one wake event */
231+
232+
if ((wakeEvents&WL_LATCH_SET)&&latch->owner_pid!=MyProcPid)
216233
elog(ERROR,"cannot wait on a latch owned by another process");
217234

218235
/* Initialize timeout */
219-
if (timeout >=0)
236+
if (wakeEvents&WL_TIMEOUT)
220237
{
238+
Assert(timeout >=0);
221239
tv.tv_sec=timeout /1000000L;
222240
tv.tv_usec=timeout %1000000L;
223241
tvp=&tv;
224242
}
225243

226244
waiting= true;
227-
for (;;)
245+
do
228246
{
229247
inthifd;
230248

@@ -235,31 +253,46 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
235253
* do that), and the select() will return immediately.
236254
*/
237255
drainSelfPipe();
238-
if (latch->is_set)
256+
if ((wakeEvents&WL_LATCH_SET)&&latch->is_set)
239257
{
240-
result=1;
258+
result |=WL_LATCH_SET;
259+
/*
260+
* Leave loop immediately, avoid blocking again. We don't attempt
261+
* to report any other events that might also be satisfied.
262+
*/
241263
break;
242264
}
243265

244266
FD_ZERO(&input_mask);
245267
FD_SET(selfpipe_readfd,&input_mask);
246268
hifd=selfpipe_readfd;
247-
if (sock!=PGINVALID_SOCKET&&forRead)
269+
270+
if (wakeEvents&WL_POSTMASTER_DEATH)
271+
{
272+
FD_SET(postmaster_alive_fds[POSTMASTER_FD_WATCH],&input_mask);
273+
if (postmaster_alive_fds[POSTMASTER_FD_WATCH]>hifd)
274+
hifd=postmaster_alive_fds[POSTMASTER_FD_WATCH];
275+
}
276+
277+
if (wakeEvents&WL_SOCKET_READABLE)
248278
{
249279
FD_SET(sock,&input_mask);
250280
if (sock>hifd)
251281
hifd=sock;
252282
}
253283

254284
FD_ZERO(&output_mask);
255-
if (sock!=PGINVALID_SOCKET&&forWrite)
285+
if (wakeEvents&WL_SOCKET_WRITEABLE)
256286
{
257287
FD_SET(sock,&output_mask);
258288
if (sock>hifd)
259289
hifd=sock;
260290
}
261291

292+
/* Sleep */
262293
rc=select(hifd+1,&input_mask,&output_mask,NULL,tvp);
294+
295+
/* Check return code */
263296
if (rc<0)
264297
{
265298
if (errno==EINTR)
@@ -268,20 +301,26 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
268301
(errcode_for_socket_access(),
269302
errmsg("select() failed: %m")));
270303
}
271-
if (rc==0)
304+
if (rc==0&& (wakeEvents&WL_TIMEOUT))
272305
{
273306
/* timeout exceeded */
274-
result=0;
275-
break;
307+
result |=WL_TIMEOUT;
276308
}
277-
if (sock!=PGINVALID_SOCKET&&
278-
((forRead&&FD_ISSET(sock,&input_mask))||
279-
(forWrite&&FD_ISSET(sock,&output_mask))))
309+
if ((wakeEvents&WL_SOCKET_READABLE)&&FD_ISSET(sock,&input_mask))
280310
{
281-
result=2;
282-
break;/* data available in socket */
311+
/* data available in socket */
312+
result |=WL_SOCKET_READABLE;
283313
}
284-
}
314+
if ((wakeEvents&WL_SOCKET_WRITEABLE)&&FD_ISSET(sock,&output_mask))
315+
{
316+
result |=WL_SOCKET_WRITEABLE;
317+
}
318+
if ((wakeEvents&WL_POSTMASTER_DEATH)&&
319+
FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH],&input_mask))
320+
{
321+
result |=WL_POSTMASTER_DEATH;
322+
}
323+
}while(result==0);
285324
waiting= false;
286325

287326
returnresult;

‎src/backend/port/win32_latch.c

Lines changed: 71 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include<unistd.h>
2424

2525
#include"miscadmin.h"
26+
#include"postmaster/postmaster.h"
2627
#include"replication/walsender.h"
2728
#include"storage/latch.h"
2829
#include"storage/shmem.h"
@@ -81,43 +82,67 @@ DisownLatch(volatile Latch *latch)
8182
latch->owner_pid=0;
8283
}
8384

84-
bool
85-
WaitLatch(volatileLatch*latch,longtimeout)
85+
int
86+
WaitLatch(volatileLatch*latch,intwakeEvents,longtimeout)
8687
{
87-
returnWaitLatchOrSocket(latch,PGINVALID_SOCKET, false, false,timeout)>0;
88+
returnWaitLatchOrSocket(latch,wakeEvents,PGINVALID_SOCKET,timeout);
8889
}
8990

9091
int
91-
WaitLatchOrSocket(volatileLatch*latch,SOCKETsock,boolforRead,
92-
boolforWrite,longtimeout)
92+
WaitLatchOrSocket(volatileLatch*latch,intwakeEvents,SOCKETsock,
93+
longtimeout)
9394
{
9495
DWORDrc;
95-
HANDLEevents[3];
96+
HANDLEevents[4];
9697
HANDLElatchevent;
97-
HANDLEsockevent=WSA_INVALID_EVENT;/* silence compiler */
98+
HANDLEsockevent=WSA_INVALID_EVENT;
9899
intnumevents;
99100
intresult=0;
101+
intpmdeath_eventno;
102+
longtimeout_ms;
103+
104+
Assert(wakeEvents!=0);
105+
106+
/* Ignore WL_SOCKET_* events if no valid socket is given */
107+
if (sock==PGINVALID_SOCKET)
108+
wakeEvents &= ~(WL_SOCKET_READABLE |WL_SOCKET_WRITEABLE);
109+
110+
/* Convert timeout to milliseconds for WaitForMultipleObjects() */
111+
if (wakeEvents&WL_TIMEOUT)
112+
{
113+
Assert(timeout >=0);
114+
timeout_ms=timeout /1000;
115+
}
116+
else
117+
timeout_ms=INFINITE;
100118

119+
/* Construct an array of event handles for WaitforMultipleObjects() */
101120
latchevent=latch->event;
102121

103122
events[0]=latchevent;
104123
events[1]=pgwin32_signal_event;
105124
numevents=2;
106-
if (sock!=PGINVALID_SOCKET&& (forRead||forWrite))
125+
if (((wakeEvents&WL_SOCKET_READABLE)||
126+
(wakeEvents&WL_SOCKET_WRITEABLE)))
107127
{
108128
intflags=0;
109129

110-
if (forRead)
130+
if (wakeEvents&WL_SOCKET_READABLE)
111131
flags |=FD_READ;
112-
if (forWrite)
132+
if (wakeEvents&WL_SOCKET_WRITEABLE)
113133
flags |=FD_WRITE;
114134

115135
sockevent=WSACreateEvent();
116136
WSAEventSelect(sock,sockevent,flags);
117137
events[numevents++]=sockevent;
118138
}
139+
if (wakeEvents&WL_POSTMASTER_DEATH)
140+
{
141+
pmdeath_eventno=numevents;
142+
events[numevents++]=PostmasterHandle;
143+
}
119144

120-
for (;;)
145+
do
121146
{
122147
/*
123148
* Reset the event, and check if the latch is set already. If someone
@@ -127,45 +152,64 @@ WaitLatchOrSocket(volatile Latch *latch, SOCKET sock, bool forRead,
127152
*/
128153
if (!ResetEvent(latchevent))
129154
elog(ERROR,"ResetEvent failed: error code %d", (int)GetLastError());
130-
if (latch->is_set)
155+
if (latch->is_set&& (wakeEvents&WL_LATCH_SET))
131156
{
132-
result=1;
157+
result |=WL_LATCH_SET;
158+
/*
159+
* Leave loop immediately, avoid blocking again. We don't attempt
160+
* to report any other events that might also be satisfied.
161+
*/
133162
break;
134163
}
135164

136-
rc=WaitForMultipleObjects(numevents,events, FALSE,
137-
(timeout >=0) ? (timeout /1000) :INFINITE);
165+
rc=WaitForMultipleObjects(numevents,events, FALSE,timeout_ms);
166+
138167
if (rc==WAIT_FAILED)
139168
elog(ERROR,"WaitForMultipleObjects() failed: error code %d", (int)GetLastError());
169+
170+
/* Participate in Windows signal emulation */
171+
elseif (rc==WAIT_OBJECT_0+1)
172+
pgwin32_dispatch_queued_signals();
173+
174+
elseif ((wakeEvents&WL_POSTMASTER_DEATH)&&
175+
rc==WAIT_OBJECT_0+pmdeath_eventno)
176+
{
177+
/* Postmaster died */
178+
result |=WL_POSTMASTER_DEATH;
179+
}
140180
elseif (rc==WAIT_TIMEOUT)
141181
{
142-
result=0;
143-
break;
182+
result |=WL_TIMEOUT;
144183
}
145-
elseif (rc==WAIT_OBJECT_0+1)
146-
pgwin32_dispatch_queued_signals();
147-
elseif (rc==WAIT_OBJECT_0+2)
184+
elseif ((wakeEvents& (WL_SOCKET_READABLE |WL_SOCKET_WRITEABLE))!=0&&
185+
rc==WAIT_OBJECT_0+2)/* socket is at event slot 2 */
148186
{
149187
WSANETWORKEVENTSresEvents;
150188

151-
Assert(sock!=PGINVALID_SOCKET);
152-
153189
ZeroMemory(&resEvents,sizeof(resEvents));
154190
if (WSAEnumNetworkEvents(sock,sockevent,&resEvents)==SOCKET_ERROR)
155191
ereport(FATAL,
156192
(errmsg_internal("failed to enumerate network events: %i", (int)GetLastError())));
157193

158-
if ((forRead&&resEvents.lNetworkEvents&FD_READ)||
159-
(forWrite&&resEvents.lNetworkEvents&FD_WRITE))
160-
result=2;
161-
break;
194+
if ((wakeEvents&WL_SOCKET_READABLE)&&
195+
(resEvents.lNetworkEvents&FD_READ))
196+
{
197+
result |=WL_SOCKET_READABLE;
198+
}
199+
if ((wakeEvents&WL_SOCKET_WRITEABLE)&&
200+
(resEvents.lNetworkEvents&FD_WRITE))
201+
{
202+
result |=WL_SOCKET_WRITEABLE;
203+
}
162204
}
205+
/* Otherwise it must be the latch event */
163206
elseif (rc!=WAIT_OBJECT_0)
164207
elog(ERROR,"unexpected return code from WaitForMultipleObjects(): %d", (int)rc);
165208
}
209+
while(result==0);
166210

167211
/* Clean up the handle we created for the socket */
168-
if (sock!=PGINVALID_SOCKET&& (forRead||forWrite))
212+
if (sockevent!=WSA_INVALID_EVENT)
169213
{
170214
WSAEventSelect(sock,sockevent,0);
171215
WSACloseEvent(sockevent);

‎src/backend/postmaster/autovacuum.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ AutoVacLauncherMain(int argc, char *argv[])
556556
* Emergency bailout if postmaster has died. This is to avoid the
557557
* necessity for manual cleanup of all postmaster children.
558558
*/
559-
if (!PostmasterIsAlive(true))
559+
if (!PostmasterIsAlive())
560560
proc_exit(1);
561561

562562
launcher_determine_sleep((AutoVacuumShmem->av_freeWorkers!=NULL),
@@ -593,7 +593,7 @@ AutoVacLauncherMain(int argc, char *argv[])
593593
* Emergency bailout if postmaster has died. This is to avoid the
594594
* necessity for manual cleanup of all postmaster children.
595595
*/
596-
if (!PostmasterIsAlive(true))
596+
if (!PostmasterIsAlive())
597597
proc_exit(1);
598598

599599
if (got_SIGTERM||got_SIGHUP||got_SIGUSR2)

‎src/backend/postmaster/bgwriter.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ BackgroundWriterMain(void)
381381
* Emergency bailout if postmaster has died. This is to avoid the
382382
* necessity for manual cleanup of all postmaster children.
383383
*/
384-
if (!PostmasterIsAlive(true))
384+
if (!PostmasterIsAlive())
385385
exit(1);
386386

387387
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp