Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit61752af

Browse files
committed
Provide recovery_init_sync_method=syncfs.
Since commit2ce439f we have opened every file in the data directoryand called fsync() at the start of crash recovery. This can be veryslow if there are many files, leading to field complaints of systemstaking minutes or even hours to begin crash recovery.Provide an alternative method, for Linux only, where we call syncfs() onevery possibly different filesystem under the data directory. This isequivalent, but avoids faulting in potentially many inodes frompotentially slow storage.The new mode comes with some caveats, described in the documentation, sothe default value for the new setting is "fsync", preserving the olderbehavior.Reported-by: Michael Brown <michael.brown@discourse.org>Reviewed-by: Fujii Masao <masao.fujii@oss.nttdata.com>Reviewed-by: Paul Guo <guopa@vmware.com>Reviewed-by: Bruce Momjian <bruce@momjian.us>Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>Reviewed-by: David Steele <david@pgmasters.net>Discussion:https://postgr.es/m/11bc2bb7-ecb5-3ad0-b39f-df632734cd81%40discourse.orgDiscussion:https://postgr.es/m/CAEET0ZHGnbXmi8yF3ywsDZvb3m9CbdsGZgfTXscQ6agcbzcZAw%40mail.gmail.com
1 parentb822ae1 commit61752af

File tree

9 files changed

+129
-2
lines changed

9 files changed

+129
-2
lines changed

‎configure

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15409,7 +15409,7 @@ fi
1540915409
LIBS_including_readline="$LIBS"
1541015410
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
1541115411

15412-
for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l writev
15412+
for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlinksyncfssync_file_range uselocale wcstombs_l writev
1541315413
do :
1541415414
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
1541515415
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"

‎configure.ac

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,7 @@ AC_CHECK_FUNCS(m4_normalize([
17011701
strchrnul
17021702
strsignal
17031703
symlink
1704+
syncfs
17041705
sync_file_range
17051706
uselocale
17061707
wcstombs_l

‎doc/src/sgml/config.sgml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9721,6 +9721,41 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
97219721
</listitem>
97229722
</varlistentry>
97239723

9724+
<varlistentry id="guc-recovery-init-sync-method" xreflabel="recovery_init_sync_method">
9725+
<term><varname>recovery_init_sync_method</varname> (<type>enum</type>)
9726+
<indexterm>
9727+
<primary><varname>recovery_init_sync_method</varname> configuration parameter</primary>
9728+
</indexterm>
9729+
</term>
9730+
<listitem>
9731+
<para>
9732+
When set to <literal>fsync</literal>, which is the default,
9733+
<productname>PostgreSQL</productname> will recursively open and
9734+
synchronize all files in the data directory before crash recovery
9735+
begins. The search for files will follow symbolic links for the WAL
9736+
directory and each configured tablespace (but not any other symbolic
9737+
links). This is intended to make sure that all WAL and data files are
9738+
durably stored on disk before replaying changes. This applies whenever
9739+
starting a database cluster that did not shut down cleanly, including
9740+
copies created with <application>pg_basebackup</application>.
9741+
</para>
9742+
<para>
9743+
On Linux, <literal>syncfs</literal> may be used instead, to ask the
9744+
operating system to synchronize the whole file systems that contain the
9745+
data directory, the WAL files and each tablespace (but not any other
9746+
file systems that may be reachable through symbolic links). This may
9747+
be a lot faster than the <literal>fsync</literal> setting, because it
9748+
doesn't need to open each file one by one. On the other hand, it may
9749+
be slower if a file system is shared by other applications that
9750+
modify a lot of files, since those files will also be written to disk.
9751+
Furthermore, on versions of Linux before 5.8, I/O errors encountered
9752+
while writing data to disk may not be reported to
9753+
<productname>PostgreSQL</productname>, and relevant error messages may
9754+
appear only in kernel logs.
9755+
</para>
9756+
</listitem>
9757+
</varlistentry>
9758+
97249759
</variablelist>
97259760

97269761
</sect1>

‎src/backend/storage/file/fd.c

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,11 @@
7272

7373
#include"postgres.h"
7474

75+
#include<dirent.h>
7576
#include<sys/file.h>
7677
#include<sys/param.h>
7778
#include<sys/stat.h>
79+
#include<sys/types.h>
7880
#ifndefWIN32
7981
#include<sys/mman.h>
8082
#endif
@@ -158,6 +160,9 @@ intmax_safe_fds = FD_MINFREE;/* default if not changed */
158160
/* Whether it is safe to continue running after fsync() fails. */
159161
booldata_sync_retry= false;
160162

163+
/* How SyncDataDirectory() should do its job. */
164+
intrecovery_init_sync_method=RECOVERY_INIT_SYNC_METHOD_FSYNC;
165+
161166
/* Debugging.... */
162167

163168
#ifdefFDDEBUG
@@ -3265,9 +3270,31 @@ looks_like_temp_rel_name(const char *name)
32653270
return true;
32663271
}
32673272

3273+
#ifdefHAVE_SYNCFS
3274+
staticvoid
3275+
do_syncfs(constchar*path)
3276+
{
3277+
intfd;
3278+
3279+
fd=OpenTransientFile(path,O_RDONLY);
3280+
if (fd<0)
3281+
{
3282+
ereport(LOG,
3283+
(errcode_for_file_access(),
3284+
errmsg("could not open %s: %m",path)));
3285+
return;
3286+
}
3287+
if (syncfs(fd)<0)
3288+
ereport(LOG,
3289+
(errcode_for_file_access(),
3290+
errmsg("could not sync filesystem for \"%s\": %m",path)));
3291+
CloseTransientFile(fd);
3292+
}
3293+
#endif
32683294

32693295
/*
3270-
* Issue fsync recursively on PGDATA and all its contents.
3296+
* Issue fsync recursively on PGDATA and all its contents, or issue syncfs for
3297+
* all potential filesystem, depending on recovery_init_sync_method setting.
32713298
*
32723299
* We fsync regular files and directories wherever they are, but we
32733300
* follow symlinks only for pg_wal and immediately under pg_tblspc.
@@ -3319,6 +3346,42 @@ SyncDataDirectory(void)
33193346
xlog_is_symlink= true;
33203347
#endif
33213348

3349+
#ifdefHAVE_SYNCFS
3350+
if (recovery_init_sync_method==RECOVERY_INIT_SYNC_METHOD_SYNCFS)
3351+
{
3352+
DIR*dir;
3353+
structdirent*de;
3354+
3355+
/*
3356+
* On Linux, we don't have to open every single file one by one. We
3357+
* can use syncfs() to sync whole filesystems. We only expect
3358+
* filesystem boundaries to exist where we tolerate symlinks, namely
3359+
* pg_wal and the tablespaces, so we call syncfs() for each of those
3360+
* directories.
3361+
*/
3362+
3363+
/* Sync the top level pgdata directory. */
3364+
do_syncfs(".");
3365+
/* If any tablespaces are configured, sync each of those. */
3366+
dir=AllocateDir("pg_tblspc");
3367+
while ((de=ReadDirExtended(dir,"pg_tblspc",LOG)))
3368+
{
3369+
charpath[MAXPGPATH];
3370+
3371+
if (strcmp(de->d_name,".")==0||strcmp(de->d_name,"..")==0)
3372+
continue;
3373+
3374+
snprintf(path,MAXPGPATH,"pg_tblspc/%s",de->d_name);
3375+
do_syncfs(path);
3376+
}
3377+
FreeDir(dir);
3378+
/* If pg_wal is a symlink, process that too. */
3379+
if (xlog_is_symlink)
3380+
do_syncfs("pg_wal");
3381+
return;
3382+
}
3383+
#endif/* !HAVE_SYNCFS */
3384+
33223385
/*
33233386
* If possible, hint to the kernel that we're soon going to fsync the data
33243387
* directory and its contents. Errors in this step are even less

‎src/backend/utils/misc/guc.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,14 @@ const struct config_enum_entry ssl_protocol_versions_info[] = {
488488
StaticAssertDecl(lengthof(ssl_protocol_versions_info)== (PG_TLS1_3_VERSION+2),
489489
"array length mismatch");
490490

491+
staticstructconfig_enum_entryrecovery_init_sync_method_options[]= {
492+
{"fsync",RECOVERY_INIT_SYNC_METHOD_FSYNC, false},
493+
#ifdefHAVE_SYNCFS
494+
{"syncfs",RECOVERY_INIT_SYNC_METHOD_SYNCFS, false},
495+
#endif
496+
{NULL,0, false}
497+
};
498+
491499
staticstructconfig_enum_entryshared_memory_options[]= {
492500
#ifndefWIN32
493501
{"sysv",SHMEM_TYPE_SYSV, false},
@@ -4871,6 +4879,15 @@ static struct config_enum ConfigureNamesEnum[] =
48714879
NULL,NULL,NULL
48724880
},
48734881

4882+
{
4883+
{"recovery_init_sync_method",PGC_POSTMASTER,ERROR_HANDLING_OPTIONS,
4884+
gettext_noop("Sets the method for synchronizing the data directory before crash recovery."),
4885+
},
4886+
&recovery_init_sync_method,
4887+
RECOVERY_INIT_SYNC_METHOD_FSYNC,recovery_init_sync_method_options,
4888+
NULL,NULL,NULL
4889+
},
4890+
48744891
/* End-of-list marker */
48754892
{
48764893
{NULL,0,0,NULL,NULL},NULL,0,NULL,NULL,NULL,NULL

‎src/backend/utils/misc/postgresql.conf.sample

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,7 @@
761761
#restart_after_crash = on# reinitialize after backend crash?
762762
#remove_temp_files_after_crash = on# remove temporary files after
763763
# backend crash?
764+
#recovery_init_sync_method = fsync# fsync, syncfs (Linux 5.8+)
764765
#data_sync_retry = off# retry or panic on failure to fsync
765766
# data?
766767
# (change requires restart)

‎src/include/pg_config.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,9 @@
590590
/* Define to 1 if you have the `symlink' function. */
591591
#undef HAVE_SYMLINK
592592

593+
/* Define to 1 if you have the `syncfs' function. */
594+
#undef HAVE_SYNCFS
595+
593596
/* Define to 1 if you have the `sync_file_range' function. */
594597
#undef HAVE_SYNC_FILE_RANGE
595598

‎src/include/storage/fd.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@
4545

4646
#include<dirent.h>
4747

48+
typedefenumRecoveryInitSyncMethod {
49+
RECOVERY_INIT_SYNC_METHOD_FSYNC,
50+
RECOVERY_INIT_SYNC_METHOD_SYNCFS
51+
}RecoveryInitSyncMethod;
52+
4853
structiovec;/* avoid including port/pg_iovec.h here */
4954

5055
typedefintFile;
@@ -53,6 +58,7 @@ typedef int File;
5358
/* GUC parameter */
5459
externPGDLLIMPORTintmax_files_per_process;
5560
externPGDLLIMPORTbooldata_sync_retry;
61+
externintrecovery_init_sync_method;
5662

5763
/*
5864
* This is private to fd.c, but exported for save/restore_backend_variables()

‎src/tools/msvc/Solution.pm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ sub GenerateFiles
388388
HAVE_STRUCT_TM_TM_ZONE=>undef,
389389
HAVE_SYNC_FILE_RANGE=>undef,
390390
HAVE_SYMLINK=> 1,
391+
HAVE_SYNCFS=>undef,
391392
HAVE_SYSLOG=>undef,
392393
HAVE_SYS_EPOLL_H=>undef,
393394
HAVE_SYS_EVENT_H=>undef,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp