2121 *
2222 * If the bgwriter exits unexpectedly, the postmaster treats that the same
2323 * as a backend crash: shared memory may be corrupted, so remaining backends
24- * should be killed by SIGQUIT and then a recovery cycle started. (Even if
25- * shared memory isn't corrupted, we have lost information about which
26- * files need to be fsync'd for the next checkpoint, and so a system
27- * restart needs to be forced.)
24+ * should be killed by SIGQUIT and then a recovery cycle started.
2825 *
2926 *
3027 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
4845#include "pgstat.h"
4946#include "postmaster/bgwriter.h"
5047#include "storage/bufmgr.h"
48+ #include "storage/buf_internals.h"
5149#include "storage/ipc.h"
5250#include "storage/lwlock.h"
5351#include "storage/pmsignal.h"
6664int BgWriterDelay = 200 ;
6765
6866/*
69- * Time to sleep between bgwriter rounds, when it has no work to do.
67+ * Multiplier to apply to BgWriterDelay when we decide to hibernate.
68+ * (Perhaps this needs to be configurable?)
7069 */
71- #define BGWRITER_HIBERNATE_MS 10000
70+ #define HIBERNATE_FACTOR 50
7271
7372/*
7473 * Flags set by interrupt handlers for later service in the main loop.
@@ -81,10 +80,6 @@ static volatile sig_atomic_t shutdown_requested = false;
8180 */
8281static bool am_bg_writer = false;
8382
84- /* Prototypes for private functions */
85-
86- static void BgWriterNap (bool hibernating );
87-
8883/* Signal handlers */
8984
9085static void bg_quickdie (SIGNAL_ARGS );
@@ -104,7 +99,7 @@ BackgroundWriterMain(void)
10499{
105100sigjmp_buf local_sigjmp_buf ;
106101MemoryContext bgwriter_context ;
107- bool hibernating ;
102+ bool prev_hibernate ;
108103
109104am_bg_writer = true;
110105
@@ -126,7 +121,7 @@ BackgroundWriterMain(void)
126121 * handler is still needed for latch wakeups.
127122 */
128123pqsignal (SIGHUP ,BgSigHupHandler );/* set flag to read config file */
129- pqsignal (SIGINT ,SIG_IGN );/* as of 9.2 no longer requests checkpoint */
124+ pqsignal (SIGINT ,SIG_IGN );
130125pqsignal (SIGTERM ,ReqShutdownHandler );/* shutdown */
131126pqsignal (SIGQUIT ,bg_quickdie );/* hard crash time */
132127pqsignal (SIGALRM ,SIG_IGN );
@@ -146,12 +141,6 @@ BackgroundWriterMain(void)
146141/* We allow SIGQUIT (quickdie) at all times */
147142sigdelset (& BlockSig ,SIGQUIT );
148143
149- /*
150- * Advertise our latch that backends can use to wake us up while we're
151- * sleeping.
152- */
153- ProcGlobal -> bgwriterLatch = & MyProc -> procLatch ;
154-
155144/*
156145 * Create a resource owner to keep track of our resources (currently only
157146 * buffer pins).
@@ -246,26 +235,26 @@ BackgroundWriterMain(void)
246235if (RecoveryInProgress ())
247236ThisTimeLineID = GetRecoveryTargetTLI ();
248237
238+ /*
239+ * Reset hibernation state after any error.
240+ */
241+ prev_hibernate = false;
242+
249243/*
250244 * Loop forever
251245 */
252- hibernating = false;
253246for (;;)
254247{
255- bool lapped ;
248+ bool can_hibernate ;
249+ int rc ;
256250
257- /*
258- * Emergency bailout if postmaster has died. This is to avoid the
259- * necessity for manual cleanup of all postmaster children.
260- */
261- if (!PostmasterIsAlive ())
262- exit (1 );
251+ /* Clear any already-pending wakeups */
252+ ResetLatch (& MyProc -> procLatch );
263253
264254if (got_SIGHUP )
265255{
266256got_SIGHUP = false;
267257ProcessConfigFile (PGC_SIGHUP );
268- /* update global shmem state for sync rep */
269258}
270259if (shutdown_requested )
271260{
@@ -281,126 +270,69 @@ BackgroundWriterMain(void)
281270/*
282271 * Do one cycle of dirty-buffer writing.
283272 */
284- if (hibernating && bgwriter_lru_maxpages > 0 )
285- ResetLatch (& MyProc -> procLatch );
286- lapped = BgBufferSync ();
287-
288- if (lapped && !hibernating )
289- {
290- /*
291- * BgBufferSync did nothing. Since there doesn't seem to be any
292- * work for the bgwriter to do, go into slower-paced
293- * "hibernation" mode, where we sleep for much longer times than
294- * bgwriter_delay says. Fewer wakeups saves electricity. If a
295- * backend starts dirtying pages again, it will wake us up by
296- * setting our latch.
297- *
298- * The latch is kept set during productive cycles where buffers
299- * are written, and only reset before going into a longer sleep.
300- * That ensures that when there's a constant trickle of activity,
301- * the SetLatch() calls that backends have to do will see the
302- * latch as already set, and are not slowed down by having to
303- * actually set the latch and signal us.
304- */
305- hibernating = true;
306-
307- /*
308- * Take one more short nap and perform one more bgwriter cycle -
309- * someone might've dirtied a buffer just after we finished the
310- * previous bgwriter cycle, while the latch was still set. If
311- * we still find nothing to do after this cycle, the next sleep
312- * will be longer.
313- */
314- BgWriterNap (false);
315- continue ;
316- }
317- else if (!lapped && hibernating )
318- {
319- /*
320- * Woken up from hibernation. Set the latch just in case it's
321- * not set yet (usually we wake up from hibernation because a
322- * backend already set the latch, but not necessarily).
323- */
324- SetLatch (& MyProc -> procLatch );
325- hibernating = false;
326- }
273+ can_hibernate = BgBufferSync ();
327274
328275/*
329- * Take a short or long nap, depending on whether there was any work
330- * to do.
276+ * Send off activity statistics to the stats collector
331277 */
332- BgWriterNap (hibernating );
333- }
334- }
278+ pgstat_send_bgwriter ();
335279
336- /*
337- * BgWriterNap -- Nap for the configured time or until a signal is received.
338- *
339- * If 'hibernating' is false, sleeps for bgwriter_delay milliseconds.
340- * Otherwise sleeps longer, but also wakes up if the process latch is set.
341- */
342- static void
343- BgWriterNap (bool hibernating )
344- {
345- long udelay ;
346-
347- /*
348- * Send off activity statistics to the stats collector
349- */
350- pgstat_send_bgwriter ();
351-
352- /*
353- * If there was no work to do in the previous bgwriter cycle, take a
354- * longer nap.
355- */
356- if (hibernating )
357- {
358280/*
359- * We wake on a buffer being dirtied. It's possible that some
360- * useful work will become available for the bgwriter to do without
361- * a buffer actually being dirtied, like when a dirty buffer's usage
362- * count is decremented to zero or it's unpinned. This corner case
363- * is judged as too marginal to justify adding additional SetLatch()
364- * calls in very hot code paths, cheap though those calls may be.
281+ * Sleep until we are signaled or BgWriterDelay has elapsed.
365282 *
366- * We still wake up periodically, so that BufferAlloc stats are
367- * updated reasonably promptly.
283+ * Note: the feedback control loop in BgBufferSync() expects that we
284+ * will call it every BgWriterDelay msec. While it's not critical for
285+ * correctness that that be exact, the feedback loop might misbehave
286+ * if we stray too far from that. Hence, avoid loading this process
287+ * down with latch events that are likely to happen frequently during
288+ * normal operation.
368289 */
369- int res = WaitLatch (& MyProc -> procLatch ,
370- WL_LATCH_SET |WL_TIMEOUT |WL_POSTMASTER_DEATH ,
371- BGWRITER_HIBERNATE_MS );
290+ rc = WaitLatch (& MyProc -> procLatch ,
291+ WL_LATCH_SET |WL_TIMEOUT |WL_POSTMASTER_DEATH ,
292+ BgWriterDelay /* ms */ );
372293
373294/*
374- * Only do a quick return if timeout was reached (or postmaster died)
375- * to ensure that no less than BgWriterDelay ms has passed between
376- * BgBufferSyncs - WaitLatch() might have returned instantaneously.
295+ * If no latch event and BgBufferSync says nothing's happening, extend
296+ * the sleep in "hibernation" mode, where we sleep for much longer
297+ * than bgwriter_delay says. Fewer wakeups save electricity. When a
298+ * backend starts using buffers again, it will wake us up by setting
299+ * our latch. Because the extra sleep will persist only as long as no
300+ * buffer allocations happen, this should not distort the behavior of
301+ * BgBufferSync's control loop too badly; essentially, it will think
302+ * that the system-wide idle interval didn't exist.
303+ *
304+ * There is a race condition here, in that a backend might allocate a
305+ * buffer between the time BgBufferSync saw the alloc count as zero
306+ * and the time we call StrategyNotifyBgWriter. While it's not
307+ * critical that we not hibernate anyway, we try to reduce the odds of
308+ * that by only hibernating when BgBufferSync says nothing's happening
309+ * for two consecutive cycles. Also, we mitigate any possible
310+ * consequences of a missed wakeup by not hibernating forever.
377311 */
378- if (res & (WL_TIMEOUT |WL_POSTMASTER_DEATH ))
379- return ;
380- }
312+ if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate )
313+ {
314+ /* Ask for notification at next buffer allocation */
315+ StrategyNotifyBgWriter (& MyProc -> procLatch );
316+ /* Sleep ... */
317+ rc = WaitLatch (& MyProc -> procLatch ,
318+ WL_LATCH_SET |WL_TIMEOUT |WL_POSTMASTER_DEATH ,
319+ BgWriterDelay * HIBERNATE_FACTOR );
320+ /* Reset the notification request in case we timed out */
321+ StrategyNotifyBgWriter (NULL );
322+ }
381323
382- /*
383- * Nap for the configured time.
384- *
385- * On some platforms, signals won't interrupt the sleep. To ensure we
386- * respond reasonably promptly when someone signals us, break down the
387- * sleep into 1-second increments, and check for interrupts after each
388- * nap.
389- */
390- udelay = BgWriterDelay * 1000L ;
324+ /*
325+ * Emergency bailout if postmaster has died. This is to avoid the
326+ * necessity for manual cleanup of all postmaster children.
327+ */
328+ if (rc & WL_POSTMASTER_DEATH )
329+ exit (1 );
391330
392- while (udelay > 999999L )
393- {
394- if (got_SIGHUP || shutdown_requested )
395- break ;
396- pg_usleep (1000000L );
397- udelay -= 1000000L ;
331+ prev_hibernate = can_hibernate ;
398332}
399-
400- if (!(got_SIGHUP || shutdown_requested ))
401- pg_usleep (udelay );
402333}
403334
335+
404336/* --------------------------------
405337 *signal handler routines
406338 * --------------------------------