Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitad458cf

Browse files
committed
Don't use O_DIRECT when writing WAL files if archiving or streaming is
enabled. Bypassing the kernel cache is counter-productive in that case,because the archiver/walsender process will read from the WAL filesoon after it's written, and if it's not cached the read will causea physical read, eating I/O bandwidth available on the WAL drive.Also, walreceiver process does unaligned writes, so disable O_DIRECTin walreceiver process for that reason too.
1 parent94f610b commitad458cf

File tree

4 files changed

+47
-26
lines changed

4 files changed

+47
-26
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.376 2010/02/1901:04:03itagaki Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.377 2010/02/1910:51:03heikki Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -2686,13 +2686,10 @@ XLogFileClose(void)
26862686
* WAL segment files will not be re-read in normal operation, so we advise
26872687
* the OS to release any cached pages.But do not do so if WAL archiving
26882688
* or streaming is active, because archiver and walsender process could use
2689-
* the cache to read the WAL segment. Also, don't bother with it if we
2690-
* are using O_DIRECT, since the kernel is presumably not caching in that
2691-
* case.
2689+
* the cache to read the WAL segment.
26922690
*/
26932691
#if defined(USE_POSIX_FADVISE)&& defined(POSIX_FADV_DONTNEED)
2694-
if (!XLogIsNeeded()&&
2695-
(get_sync_bit(sync_method)&PG_O_DIRECT)==0)
2692+
if (!XLogIsNeeded())
26962693
(void)posix_fadvise(openLogFile,0,0,POSIX_FADV_DONTNEED);
26972694
#endif
26982695

@@ -7652,10 +7649,29 @@ xlog_outrec(StringInfo buf, XLogRecord *record)
76527649
staticint
76537650
get_sync_bit(intmethod)
76547651
{
7652+
into_direct_flag=0;
7653+
76557654
/* If fsync is disabled, never open in sync mode */
76567655
if (!enableFsync)
76577656
return0;
76587657

7658+
/*
7659+
* Optimize writes by bypassing kernel cache with O_DIRECT when using
7660+
* O_SYNC, O_DSYNC or O_FSYNC. But only if archiving and streaming are
7661+
* disabled, otherwise the archive command or walsender process will
7662+
* read the WAL soon after writing it, which is guaranteed to cause a
7663+
* physical read if we bypassed the kernel cache. We also skip the
7664+
* posix_fadvise(POSIX_FADV_DONTNEED) call in XLogFileClose() for the
7665+
* same reason.
7666+
*
7667+
* Never use O_DIRECT in walreceiver process for similar reasons; the WAL
7668+
* written by walreceiver is normally read by the startup process soon
7669+
* after its written. Also, walreceiver performs unaligned writes, which
7670+
* don't work with O_DIRECT, so it is required for correctness too.
7671+
*/
7672+
if (!XLogIsNeeded()&& !am_walreceiver)
7673+
o_direct_flag=PG_O_DIRECT;
7674+
76597675
switch (method)
76607676
{
76617677
/*
@@ -7670,11 +7686,11 @@ get_sync_bit(int method)
76707686
return0;
76717687
#ifdefOPEN_SYNC_FLAG
76727688
caseSYNC_METHOD_OPEN:
7673-
returnOPEN_SYNC_FLAG;
7689+
returnOPEN_SYNC_FLAG |o_direct_flag;
76747690
#endif
76757691
#ifdefOPEN_DATASYNC_FLAG
76767692
caseSYNC_METHOD_OPEN_DSYNC:
7677-
returnOPEN_DATASYNC_FLAG;
7693+
returnOPEN_DATASYNC_FLAG |o_direct_flag;
76787694
#endif
76797695
default:
76807696
/* can't happen (unless we are out of sync with option array) */

‎src/backend/replication/walreceiver.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
*
3030
*
3131
* IDENTIFICATION
32-
* $PostgreSQL: pgsql/src/backend/replication/walreceiver.c,v 1.4 2010/02/17 04:19:39 tgl Exp $
32+
* $PostgreSQL: pgsql/src/backend/replication/walreceiver.c,v 1.5 2010/02/19 10:51:04 heikki Exp $
3333
*
3434
*-------------------------------------------------------------------------
3535
*/
@@ -50,6 +50,9 @@
5050
#include"utils/ps_status.h"
5151
#include"utils/resowner.h"
5252

53+
/* Global variable to indicate if this process is a walreceiver process */
54+
boolam_walreceiver;
55+
5356
/* libpqreceiver hooks to these when loaded */
5457
walrcv_connect_typewalrcv_connect=NULL;
5558
walrcv_receive_typewalrcv_receive=NULL;
@@ -158,6 +161,8 @@ WalReceiverMain(void)
158161
/* use volatile pointer to prevent code rearrangement */
159162
volatileWalRcvData*walrcv=WalRcv;
160163

164+
am_walreceiver= true;
165+
161166
/*
162167
* WalRcv should be set up already (if we are a backend, we inherit
163168
* this by fork() or EXEC_BACKEND mechanism from the postmaster).
@@ -424,16 +429,18 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
424429
booluse_existent;
425430

426431
/*
427-
*XLOG segment files will be re-read in recovery operation soon,
428-
*so we don't need toadvise the OS torelease any cache page.
432+
*fsync() and close current file before we switch to next one.
433+
*We would otherwise have toreopen this file tofsync it later
429434
*/
430435
if (recvFile >=0)
431436
{
437+
XLogWalRcvFlush();
438+
432439
/*
433-
* fsync() before we switch to next file. We would otherwise
434-
* have to reopen this file to fsync it later
440+
* XLOG segment files will be re-read by recovery in startup
441+
* process soon, so we don't advise the OS to release cache
442+
* pages associated with the file like XLogFileClose() does.
435443
*/
436-
XLogWalRcvFlush();
437444
if (close(recvFile)!=0)
438445
ereport(PANIC,
439446
(errcode_for_file_access(),
@@ -445,8 +452,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
445452
/* Create/use new log file */
446453
XLByteToSeg(recptr,recvId,recvSeg);
447454
use_existent= true;
448-
recvFile=XLogFileInit(recvId,recvSeg,
449-
&use_existent, true);
455+
recvFile=XLogFileInit(recvId,recvSeg,&use_existent, true);
450456
recvOff=0;
451457
}
452458

‎src/include/access/xlogdefs.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.25 2010/01/15 09:19:06 heikki Exp $
10+
* $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.26 2010/02/19 10:51:04 heikki Exp $
1111
*/
1212
#ifndefXLOG_DEFS_H
1313
#defineXLOG_DEFS_H
@@ -106,23 +106,20 @@ typedef uint32 TimeLineID;
106106
* configure determined whether fdatasync() is.
107107
*/
108108
#if defined(O_SYNC)
109-
#defineBARE_OPEN_SYNC_FLAGO_SYNC
109+
#defineOPEN_SYNC_FLAGO_SYNC
110110
#elif defined(O_FSYNC)
111-
#defineBARE_OPEN_SYNC_FLAGO_FSYNC
112-
#endif
113-
#ifdefBARE_OPEN_SYNC_FLAG
114-
#defineOPEN_SYNC_FLAG(BARE_OPEN_SYNC_FLAG | PG_O_DIRECT)
111+
#defineOPEN_SYNC_FLAGO_FSYNC
115112
#endif
116113

117114
#if defined(O_DSYNC)
118115
#if defined(OPEN_SYNC_FLAG)
119116
/* O_DSYNC is distinct? */
120-
#ifO_DSYNC!=BARE_OPEN_SYNC_FLAG
121-
#defineOPEN_DATASYNC_FLAG(O_DSYNC | PG_O_DIRECT)
117+
#ifO_DSYNC!=OPEN_SYNC_FLAG
118+
#defineOPEN_DATASYNC_FLAGO_DSYNC
122119
#endif
123120
#else/* !defined(OPEN_SYNC_FLAG) */
124121
/* Win32 only has O_DSYNC */
125-
#defineOPEN_DATASYNC_FLAG(O_DSYNC | PG_O_DIRECT)
122+
#defineOPEN_DATASYNC_FLAGO_DSYNC
126123
#endif
127124
#endif
128125

‎src/include/replication/walreceiver.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* Portions Copyright (c) 2010-2010, PostgreSQL Global Development Group
77
*
8-
* $PostgreSQL: pgsql/src/include/replication/walreceiver.h,v 1.6 2010/02/03 09:47:19 heikki Exp $
8+
* $PostgreSQL: pgsql/src/include/replication/walreceiver.h,v 1.7 2010/02/19 10:51:04 heikki Exp $
99
*
1010
*-------------------------------------------------------------------------
1111
*/
@@ -15,6 +15,8 @@
1515
#include"access/xlogdefs.h"
1616
#include"storage/spin.h"
1717

18+
externboolam_walreceiver;
19+
1820
/*
1921
* MAXCONNINFO: maximum size of a connection string.
2022
*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp