Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9ce346e

Browse files
committed
Report progress of startup operations that take a long time.
Users sometimes get concerned whe they start the server and itemits a few messages and then doesn't emit any more messages fora long time. Generally, what's happening is either that thesystem is taking a long time to apply WAL, or it's taking along time to reset unlogged relations, or it's taking a longtime to fsync the data directory, but it's not easy to tellwhich is the case.To fix that, add a new 'log_startup_progress_interval' setting,by default 10s. When an operation that is known to be potentiallylong-running takes more than this amount of time, we'll log astatus update each time this interval elapses.To avoid undesirable log chatter, don't log anything about WALreplay when in standby mode.Nitin Jadhav and Robert Haas, reviewed by Amul Sul, BharathRupireddy, Justin Pryzby, Michael Paquier, and Álvaro Herrera.Discussion:https://postgr.es/m/CA+TgmoaHQrgDFOBwgY16XCoMtXxsrVGFB2jNCvb7-ubuEe1MGg@mail.gmail.comDiscussion:https://postgr.es/m/CAMm1aWaHF7VE69572_OLQ+MgpT5RUiUDgF1x5RrtkJBLdpRj3Q@mail.gmail.com
1 parent732e667 commit9ce346e

File tree

10 files changed

+181
-3
lines changed

10 files changed

+181
-3
lines changed

‎doc/src/sgml/config.sgml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6549,6 +6549,30 @@ local0.* /var/log/postgresql
65496549
</listitem>
65506550
</varlistentry>
65516551

6552+
<varlistentry id="guc-log-startup-progress-interval" xreflabel="log_startup_progress_interval">
6553+
<term><varname>log_startup_progress_interval</varname> (<type>integer</type>)
6554+
<indexterm>
6555+
<primary><varname>log_startup_progress_interval</varname> configuration parameter</primary>
6556+
</indexterm>
6557+
</term>
6558+
<listitem>
6559+
<para>
6560+
Sets the amount of time after which the startup process will log
6561+
a message about a long-running operation that is still in progress,
6562+
as well as the interval between further progress messages for that
6563+
operation. This setting is applied separately to each operation.
6564+
For example, if syncing the data directory takes 25 seconds and
6565+
thereafter resetting unlogged relations takes 8 seconds, and if this
6566+
setting has the default value of 10 seconds, then a messages will be
6567+
logged for syncing the data directory after it has been in progress
6568+
for 10 seconds and again after it has been in progress for 20 seconds,
6569+
but nothing will be logged for resetting unlogged operations.
6570+
A setting of <literal>0</literal> disables the feature. If this value
6571+
is specified without units, it is taken as milliseconds.
6572+
</para>
6573+
</listitem>
6574+
</varlistentry>
6575+
65526576
</variablelist>
65536577

65546578
<para>

‎src/backend/access/transam/xlog.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
#include"utils/relmapper.h"
8080
#include"utils/pg_rusage.h"
8181
#include"utils/snapmgr.h"
82+
#include"utils/timeout.h"
8283
#include"utils/timestamp.h"
8384

8485
externuint32bootstrap_data_checksum_version;
@@ -6718,6 +6719,11 @@ StartupXLOG(void)
67186719
*/
67196720
ValidateXLOGDirectoryStructure();
67206721

6722+
/* Set up timeout handler needed to report startup progress. */
6723+
if (!IsBootstrapProcessingMode())
6724+
RegisterTimeout(STARTUP_PROGRESS_TIMEOUT,
6725+
startup_progress_timeout_handler);
6726+
67216727
/*----------
67226728
* If we previously crashed, perform a couple of actions:
67236729
*
@@ -7491,13 +7497,21 @@ StartupXLOG(void)
74917497
(errmsg("redo starts at %X/%X",
74927498
LSN_FORMAT_ARGS(ReadRecPtr))));
74937499

7500+
/* Prepare to report progress of the redo phase. */
7501+
if (!StandbyMode)
7502+
begin_startup_progress_phase();
7503+
74947504
/*
74957505
* main redo apply loop
74967506
*/
74977507
do
74987508
{
74997509
boolswitchedTLI= false;
75007510

7511+
if (!StandbyMode)
7512+
ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
7513+
LSN_FORMAT_ARGS(ReadRecPtr));
7514+
75017515
#ifdefWAL_DEBUG
75027516
if (XLOG_DEBUG||
75037517
(rmid==RM_XACT_ID&&trace_recovery_messages <=DEBUG2)||

‎src/backend/nls.mk

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@ GETTEXT_TRIGGERS = $(BACKEND_COMMON_GETTEXT_TRIGGERS) \
1010
yyerror\
1111
parser_yyerror\
1212
scanner_yyerror\
13-
report_invalid_record:2
13+
report_invalid_record:2\
14+
ereport_startup_progress
1415
GETTEXT_FLAGS =$(BACKEND_COMMON_GETTEXT_FLAGS)\
1516
GUC_check_errmsg:1:c-format\
1617
GUC_check_errdetail:1:c-format\
1718
GUC_check_errhint:1:c-format\
1819
write_stderr:1:c-format\
19-
report_invalid_record:2:c-format
20+
report_invalid_record:2:c-format\
21+
ereport_startup_progress:1:c-format
2022

2123
gettext-files: distprep
2224
find$(srcdir)$(srcdir)/../common$(srcdir)/../port -name'*.c' -print| LC_ALL=C sort>$@

‎src/backend/postmaster/startup.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,22 @@ static volatile sig_atomic_t promote_signaled = false;
5959
*/
6060
staticvolatilesig_atomic_tin_restore_command= false;
6161

62+
/*
63+
* Time at which the most recent startup operation started.
64+
*/
65+
staticTimestampTzstartup_progress_phase_start_time;
66+
67+
/*
68+
* Indicates whether the startup progress interval mentioned by the user is
69+
* elapsed or not. TRUE if timeout occurred, FALSE otherwise.
70+
*/
71+
staticvolatilesig_atomic_tstartup_progress_timer_expired= false;
72+
73+
/*
74+
* Time between progress updates for long-running startup operations.
75+
*/
76+
intlog_startup_progress_interval=10000;/* 10 sec */
77+
6278
/* Signal handlers */
6379
staticvoidStartupProcTriggerHandler(SIGNAL_ARGS);
6480
staticvoidStartupProcSigHupHandler(SIGNAL_ARGS);
@@ -282,3 +298,58 @@ ResetPromoteSignaled(void)
282298
{
283299
promote_signaled= false;
284300
}
301+
302+
/*
303+
* Set a flag indicating that it's time to log a progress report.
304+
*/
305+
void
306+
startup_progress_timeout_handler(void)
307+
{
308+
startup_progress_timer_expired= true;
309+
}
310+
311+
/*
312+
* Set the start timestamp of the current operation and enable the timeout.
313+
*/
314+
void
315+
begin_startup_progress_phase(void)
316+
{
317+
TimestampTzfin_time;
318+
319+
/* Feature is disabled. */
320+
if (log_startup_progress_interval==0)
321+
return;
322+
323+
startup_progress_phase_start_time=GetCurrentTimestamp();
324+
fin_time=TimestampTzPlusMilliseconds(startup_progress_phase_start_time,
325+
log_startup_progress_interval);
326+
enable_timeout_every(STARTUP_PROGRESS_TIMEOUT,fin_time,
327+
log_startup_progress_interval);
328+
}
329+
330+
/*
331+
* Report whether startup progress timeout has occurred. Reset the timer flag
332+
* if it did, set the elapsed time to the out parameters and return true,
333+
* otherwise return false.
334+
*/
335+
bool
336+
has_startup_progress_timeout_expired(long*secs,int*usecs)
337+
{
338+
longseconds;
339+
intuseconds;
340+
TimestampTznow;
341+
342+
/* No timeout has occurred. */
343+
if (!startup_progress_timer_expired)
344+
return false;
345+
346+
/* Calculate the elapsed time. */
347+
now=GetCurrentTimestamp();
348+
TimestampDifference(startup_progress_phase_start_time,now,&seconds,&useconds);
349+
350+
*secs=seconds;
351+
*usecs=useconds;
352+
startup_progress_timer_expired= false;
353+
354+
return true;
355+
}

‎src/backend/storage/file/fd.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
#include"pgstat.h"
9797
#include"port/pg_iovec.h"
9898
#include"portability/mem.h"
99+
#include"postmaster/startup.h"
99100
#include"storage/fd.h"
100101
#include"storage/ipc.h"
101102
#include"utils/guc.h"
@@ -3381,6 +3382,9 @@ do_syncfs(const char *path)
33813382
{
33823383
intfd;
33833384

3385+
ereport_startup_progress("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s",
3386+
path);
3387+
33843388
fd=OpenTransientFile(path,O_RDONLY);
33853389
if (fd<0)
33863390
{
@@ -3465,6 +3469,9 @@ SyncDataDirectory(void)
34653469
* directories.
34663470
*/
34673471

3472+
/* Prepare to report progress syncing the data directory via syncfs. */
3473+
begin_startup_progress_phase();
3474+
34683475
/* Sync the top level pgdata directory. */
34693476
do_syncfs(".");
34703477
/* If any tablespaces are configured, sync each of those. */
@@ -3487,18 +3494,24 @@ SyncDataDirectory(void)
34873494
}
34883495
#endif/* !HAVE_SYNCFS */
34893496

3497+
#ifdefPG_FLUSH_DATA_WORKS
3498+
/* Prepare to report progress of the pre-fsync phase. */
3499+
begin_startup_progress_phase();
3500+
34903501
/*
34913502
* If possible, hint to the kernel that we're soon going to fsync the data
34923503
* directory and its contents. Errors in this step are even less
34933504
* interesting than normal, so log them only at DEBUG1.
34943505
*/
3495-
#ifdefPG_FLUSH_DATA_WORKS
34963506
walkdir(".",pre_sync_fname, false,DEBUG1);
34973507
if (xlog_is_symlink)
34983508
walkdir("pg_wal",pre_sync_fname, false,DEBUG1);
34993509
walkdir("pg_tblspc",pre_sync_fname, true,DEBUG1);
35003510
#endif
35013511

3512+
/* Prepare to report progress syncing the data directory via fsync. */
3513+
begin_startup_progress_phase();
3514+
35023515
/*
35033516
* Now we do the fsync()s in the same order.
35043517
*
@@ -3601,6 +3614,9 @@ pre_sync_fname(const char *fname, bool isdir, int elevel)
36013614
if (isdir)
36023615
return;
36033616

3617+
ereport_startup_progress("syncing data directory (pre-fsync), elapsed time: %ld.%02d s, current path: %s",
3618+
fname);
3619+
36043620
fd=OpenTransientFile(fname,O_RDONLY |PG_BINARY);
36053621

36063622
if (fd<0)
@@ -3630,6 +3646,9 @@ pre_sync_fname(const char *fname, bool isdir, int elevel)
36303646
staticvoid
36313647
datadir_fsync_fname(constchar*fname,boolisdir,intelevel)
36323648
{
3649+
ereport_startup_progress("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s",
3650+
fname);
3651+
36333652
/*
36343653
* We want to silently ignoring errors about unreadable files. Pass that
36353654
* desire on to fsync_fname_ext().

‎src/backend/storage/file/reinit.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include<unistd.h>
1818

1919
#include"common/relpath.h"
20+
#include"postmaster/startup.h"
2021
#include"storage/copydir.h"
2122
#include"storage/fd.h"
2223
#include"storage/reinit.h"
@@ -65,6 +66,9 @@ ResetUnloggedRelations(int op)
6566
ALLOCSET_DEFAULT_SIZES);
6667
oldctx=MemoryContextSwitchTo(tmpctx);
6768

69+
/* Prepare to report progress resetting unlogged relations. */
70+
begin_startup_progress_phase();
71+
6872
/*
6973
* First process unlogged files in pg_default ($PGDATA/base)
7074
*/
@@ -136,6 +140,14 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
136140

137141
snprintf(dbspace_path,sizeof(dbspace_path),"%s/%s",
138142
tsdirname,de->d_name);
143+
144+
if (op&UNLOGGED_RELATION_INIT)
145+
ereport_startup_progress("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s",
146+
dbspace_path);
147+
elseif (op&UNLOGGED_RELATION_CLEANUP)
148+
ereport_startup_progress("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s",
149+
dbspace_path);
150+
139151
ResetUnloggedRelationsInDbspaceDir(dbspace_path,op);
140152
}
141153

‎src/backend/utils/misc/guc.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#include"postmaster/bgworker_internals.h"
7272
#include"postmaster/bgwriter.h"
7373
#include"postmaster/postmaster.h"
74+
#include"postmaster/startup.h"
7475
#include"postmaster/syslogger.h"
7576
#include"postmaster/walwriter.h"
7677
#include"replication/logicallauncher.h"
@@ -3570,6 +3571,18 @@ static struct config_int ConfigureNamesInt[] =
35703571
check_client_connection_check_interval,NULL,NULL
35713572
},
35723573

3574+
{
3575+
{"log_startup_progress_interval",PGC_SIGHUP,LOGGING_WHEN,
3576+
gettext_noop("Time between progress updates for "
3577+
"long-running startup operations."),
3578+
gettext_noop("0 turns this feature off."),
3579+
GUC_UNIT_MS,
3580+
},
3581+
&log_startup_progress_interval,
3582+
10000,0,INT_MAX,
3583+
NULL,NULL,NULL
3584+
},
3585+
35733586
/* End-of-list marker */
35743587
{
35753588
{NULL,0,0,NULL,NULL},NULL,0,0,0,NULL,NULL,NULL

‎src/backend/utils/misc/postgresql.conf.sample

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,11 @@
523523
# are logged regardless of their duration; 1.0 logs all
524524
# statements from all transactions, 0.0 never logs
525525

526+
#log_startup_progress_interval = 10s# Time between progress updates for
527+
# long-running startup operations.
528+
# 0 disables the feature, > 0 indicates
529+
# the interval in milliseconds.
530+
526531
# - What to Log -
527532

528533
#debug_print_parse = off

‎src/include/postmaster/startup.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,28 @@
1212
#ifndef_STARTUP_H
1313
#define_STARTUP_H
1414

15+
/*
16+
* Log the startup progress message if a timer has expired.
17+
*/
18+
#defineereport_startup_progress(msg, ...) \
19+
do { \
20+
long secs; \
21+
int usecs; \
22+
if (has_startup_progress_timeout_expired(&secs, &usecs)) \
23+
ereport(LOG, errmsg(msg, secs, (usecs / 10000), __VA_ARGS__ )); \
24+
} while(0)
25+
26+
externintlog_startup_progress_interval;
27+
1528
externvoidHandleStartupProcInterrupts(void);
1629
externvoidStartupProcessMain(void)pg_attribute_noreturn();
1730
externvoidPreRestoreCommand(void);
1831
externvoidPostRestoreCommand(void);
1932
externboolIsPromoteSignaled(void);
2033
externvoidResetPromoteSignaled(void);
2134

35+
externvoidbegin_startup_progress_phase(void);
36+
externvoidstartup_progress_timeout_handler(void);
37+
externboolhas_startup_progress_timeout_expired(long*secs,int*usecs);
38+
2239
#endif/* _STARTUP_H */

‎src/include/utils/timeout.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ typedef enum TimeoutId
3333
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
3434
IDLE_SESSION_TIMEOUT,
3535
CLIENT_CONNECTION_CHECK_TIMEOUT,
36+
STARTUP_PROGRESS_TIMEOUT,
3637
/* First user-definable timeout reason */
3738
USER_TIMEOUT,
3839
/* Maximum number of timeout reasons */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp