Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2ce439f

Browse files
committed
Recursively fsync() the data directory after a crash.
Otherwise, if there's another crash, some writes from after the firstcrash might make it to disk while writes from before the crash failto make it to disk. This could lead to data corruption.Back-patch to all supported versions.Abhijit Menon-Sen, reviewed by Andres Freund and slightly revisedby me.
1 parentec3d976 commit2ce439f

File tree

3 files changed

+159
-0
lines changed

3 files changed

+159
-0
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,8 @@ static void WALInsertLockAcquireExclusive(void);
845845
staticvoidWALInsertLockRelease(void);
846846
staticvoidWALInsertLockUpdateInsertingAt(XLogRecPtrinsertingAt);
847847

848+
staticvoidfsync_pgdata(char*datadir);
849+
848850
/*
849851
* Insert an XLOG record represented by an already-constructed chain of data
850852
* chunks. This is a low-level routine; to construct the WAL record header
@@ -5910,6 +5912,18 @@ StartupXLOG(void)
59105912
(errmsg("database system was interrupted; last known up at %s",
59115913
str_time(ControlFile->time))));
59125914

5915+
/*
5916+
* If we previously crashed, there might be data which we had written,
5917+
* intending to fsync it, but which we had not actually fsync'd yet.
5918+
* Therefore, a power failure in the near future might cause earlier
5919+
* unflushed writes to be lost, even though more recent data written to
5920+
* disk from here on would be persisted. To avoid that, fsync the entire
5921+
* data directory.
5922+
*/
5923+
if (ControlFile->state!=DB_SHUTDOWNED&&
5924+
ControlFile->state!=DB_SHUTDOWNED_IN_RECOVERY)
5925+
fsync_pgdata(data_directory);
5926+
59135927
/* This is just to allow attaching to startup process with a debugger */
59145928
#ifdefXLOG_REPLAY_DELAY
59155929
if (ControlFile->state!=DB_SHUTDOWNED)
@@ -11123,3 +11137,31 @@ SetWalWriterSleeping(bool sleeping)
1112311137
XLogCtl->WalWriterSleeping=sleeping;
1112411138
SpinLockRelease(&XLogCtl->info_lck);
1112511139
}
11140+
11141+
/*
11142+
* Issue fsync recursively on PGDATA and all its contents.
11143+
*/
11144+
staticvoid
11145+
fsync_pgdata(char*datadir)
11146+
{
11147+
if (!enableFsync)
11148+
return;
11149+
11150+
/*
11151+
* If possible, hint to the kernel that we're soon going to fsync
11152+
* the data directory and its contents.
11153+
*/
11154+
#if defined(HAVE_SYNC_FILE_RANGE)|| \
11155+
(defined(USE_POSIX_FADVISE)&& defined(POSIX_FADV_DONTNEED))
11156+
walkdir(datadir,pre_sync_fname);
11157+
#endif
11158+
11159+
/*
11160+
* Now we do the fsync()s in the same order.
11161+
*
11162+
* It's important to fsync the destination directory itself as individual
11163+
* file fsyncs don't guarantee that the directory entry for the file is
11164+
* synced.
11165+
*/
11166+
walkdir(datadir,fsync_fname);
11167+
}

‎src/backend/storage/file/fd.c

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,3 +2439,118 @@ looks_like_temp_rel_name(const char *name)
24392439
return false;
24402440
return true;
24412441
}
2442+
2443+
/*
2444+
* Hint to the OS that it should get ready to fsync() this file.
2445+
*
2446+
* Adapted from pre_sync_fname in initdb.c
2447+
*/
2448+
void
2449+
pre_sync_fname(char*fname,boolisdir)
2450+
{
2451+
intfd;
2452+
2453+
fd=open(fname,O_RDONLY |PG_BINARY);
2454+
2455+
/*
2456+
* Some OSs don't allow us to open directories at all (Windows returns
2457+
* EACCES)
2458+
*/
2459+
if (fd<0&&isdir&& (errno==EISDIR||errno==EACCES))
2460+
return;
2461+
2462+
if (fd<0)
2463+
ereport(FATAL,
2464+
(errmsg("could not open file \"%s\" before fsync",
2465+
fname)));
2466+
2467+
pg_flush_data(fd,0,0);
2468+
2469+
close(fd);
2470+
}
2471+
2472+
/*
2473+
* walkdir: recursively walk a directory, applying the action to each
2474+
* regular file and directory (including the named directory itself)
2475+
* and following symbolic links.
2476+
*
2477+
* NB: There is another version of walkdir in initdb.c, but that version
2478+
* behaves differently with respect to symbolic links. Caveat emptor!
2479+
*/
2480+
void
2481+
walkdir(char*path,void (*action) (char*fname,boolisdir))
2482+
{
2483+
DIR*dir;
2484+
structdirent*de;
2485+
2486+
dir=AllocateDir(path);
2487+
while ((de=ReadDir(dir,path))!=NULL)
2488+
{
2489+
charsubpath[MAXPGPATH];
2490+
structstatfst;
2491+
2492+
CHECK_FOR_INTERRUPTS();
2493+
2494+
if (strcmp(de->d_name,".")==0||
2495+
strcmp(de->d_name,"..")==0)
2496+
continue;
2497+
2498+
snprintf(subpath,MAXPGPATH,"%s/%s",path,de->d_name);
2499+
2500+
if (lstat(subpath,&fst)<0)
2501+
ereport(ERROR,
2502+
(errcode_for_file_access(),
2503+
errmsg("could not stat file \"%s\": %m",subpath)));
2504+
2505+
if (S_ISREG(fst.st_mode))
2506+
(*action) (subpath, false);
2507+
elseif (S_ISDIR(fst.st_mode))
2508+
walkdir(subpath,action);
2509+
#ifndefWIN32
2510+
elseif (S_ISLNK(fst.st_mode))
2511+
#else
2512+
elseif (pg_win32_is_junction(subpath))
2513+
#endif
2514+
{
2515+
#if defined(HAVE_READLINK)|| defined(WIN32)
2516+
charlinkpath[MAXPGPATH];
2517+
intlen;
2518+
structstatlst;
2519+
2520+
len=readlink(subpath,linkpath,sizeof(linkpath)-1);
2521+
if (len<0)
2522+
ereport(ERROR,
2523+
(errcode_for_file_access(),
2524+
errmsg("could not read symbolic link \"%s\": %m",
2525+
subpath)));
2526+
2527+
if (len >=sizeof(linkpath)-1)
2528+
ereport(ERROR,
2529+
(errmsg("symbolic link \"%s\" target is too long",
2530+
subpath)));
2531+
2532+
linkpath[len]='\0';
2533+
2534+
if (lstat(linkpath,&lst)==0)
2535+
{
2536+
if (S_ISREG(lst.st_mode))
2537+
(*action) (linkpath, false);
2538+
elseif (S_ISDIR(lst.st_mode))
2539+
walkdir(subpath,action);
2540+
}
2541+
elseif (errno!=ENOENT)
2542+
ereport(ERROR,
2543+
(errcode_for_file_access(),
2544+
errmsg("could not stat file \"%s\": %m",linkpath)));
2545+
#else
2546+
ereport(WARNING,
2547+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2548+
errmsg("this platform does not support symbolic links; ignoring \"%s\"",
2549+
subpath)));
2550+
#endif
2551+
}
2552+
}
2553+
FreeDir(dir);
2554+
2555+
(*action) (path, true);
2556+
}

‎src/include/storage/fd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ extern intpg_fsync_writethrough(int fd);
114114
externintpg_fdatasync(intfd);
115115
externintpg_flush_data(intfd,off_toffset,off_tamount);
116116
externvoidfsync_fname(char*fname,boolisdir);
117+
externvoidpre_sync_fname(char*fname,boolisdir);
118+
externvoidwalkdir(char*path,void (*action) (char*fname,boolisdir));
117119

118120
/* Filename components for OpenTemporaryFile */
119121
#definePG_TEMP_FILES_DIR "pgsql_tmp"

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp