Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1664c8b

Browse files
committed
Ignore server-side delays when enforcing wal_sender_timeout.
Healthy clients of servers having poor I/O performance, such asbuildfarm members hamster and tern, saw unexpected timeouts. Thatdisagreed with documentation. This fix adds one gettimeofday() callwhenever ProcessRepliesIfAny() finds no client reply messages.Back-patch to 9.4; the bug's symptom is rare and mild, and the code allmoved between 9.3 and 9.4.Discussion:https://postgr.es/m/20180826034600.GA1105084@rfd.leadboat.com
1 parent2c8cff5 commit1664c8b

File tree

1 file changed

+36
-28
lines changed

1 file changed

+36
-28
lines changed

‎src/backend/replication/walsender.c

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,12 @@ static StringInfoData output_message;
162162
staticStringInfoDatareply_message;
163163
staticStringInfoDatatmpbuf;
164164

165+
/* Timestamp of last ProcessRepliesIfAny(). */
166+
staticTimestampTzlast_processing=0;
167+
165168
/*
166-
* Timestamp ofthelastreceipt of thereply from the standby. Set to 0 if
167-
* wal_sender_timeout doesn't need to be active.
169+
* Timestamp of lastProcessRepliesIfAny() that saw areply from the
170+
*standby. Set to 0 ifwal_sender_timeout doesn't need to be active.
168171
*/
169172
staticTimestampTzlast_reply_timestamp=0;
170173

@@ -241,8 +244,8 @@ static void ProcessStandbyReplyMessage(void);
241244
staticvoidProcessStandbyHSFeedbackMessage(void);
242245
staticvoidProcessRepliesIfAny(void);
243246
staticvoidWalSndKeepalive(boolrequestReply);
244-
staticvoidWalSndKeepaliveIfNecessary(TimestampTznow);
245-
staticvoidWalSndCheckTimeOut(TimestampTznow);
247+
staticvoidWalSndKeepaliveIfNecessary(void);
248+
staticvoidWalSndCheckTimeOut(void);
246249
staticlongWalSndComputeSleeptime(TimestampTznow);
247250
staticvoidWalSndPrepareWrite(LogicalDecodingContext*ctx,XLogRecPtrlsn,TransactionIdxid,boollast_write);
248251
staticvoidWalSndWriteData(LogicalDecodingContext*ctx,XLogRecPtrlsn,TransactionIdxid,boollast_write);
@@ -1191,18 +1194,16 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
11911194
/* Check for input from the client */
11921195
ProcessRepliesIfAny();
11931196

1194-
now=GetCurrentTimestamp();
1195-
11961197
/* die if timeout was reached */
1197-
WalSndCheckTimeOut(now);
1198+
WalSndCheckTimeOut();
11981199

11991200
/* Send keepalive if the time has come */
1200-
WalSndKeepaliveIfNecessary(now);
1201+
WalSndKeepaliveIfNecessary();
12011202

12021203
if (!pq_is_send_pending())
12031204
break;
12041205

1205-
sleeptime=WalSndComputeSleeptime(now);
1206+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
12061207

12071208
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |
12081209
WL_SOCKET_WRITEABLE |WL_SOCKET_READABLE |WL_TIMEOUT;
@@ -1297,7 +1298,6 @@ WalSndWaitForWal(XLogRecPtr loc)
12971298
for (;;)
12981299
{
12991300
longsleeptime;
1300-
TimestampTznow;
13011301

13021302
/*
13031303
* Emergency bailout if postmaster has died. This is to avoid the
@@ -1382,13 +1382,11 @@ WalSndWaitForWal(XLogRecPtr loc)
13821382
!pq_is_send_pending())
13831383
break;
13841384

1385-
now=GetCurrentTimestamp();
1386-
13871385
/* die if timeout was reached */
1388-
WalSndCheckTimeOut(now);
1386+
WalSndCheckTimeOut();
13891387

13901388
/* Send keepalive if the time has come */
1391-
WalSndKeepaliveIfNecessary(now);
1389+
WalSndKeepaliveIfNecessary();
13921390

13931391
/*
13941392
* Sleep until something happens or we time out. Also wait for the
@@ -1397,7 +1395,7 @@ WalSndWaitForWal(XLogRecPtr loc)
13971395
* new WAL to be generated. (But if we have nothing to send, we don't
13981396
* want to wake on socket-writable.)
13991397
*/
1400-
sleeptime=WalSndComputeSleeptime(now);
1398+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
14011399

14021400
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |
14031401
WL_SOCKET_READABLE |WL_TIMEOUT;
@@ -1594,6 +1592,8 @@ ProcessRepliesIfAny(void)
15941592
intr;
15951593
boolreceived= false;
15961594

1595+
last_processing=GetCurrentTimestamp();
1596+
15971597
for (;;)
15981598
{
15991599
pq_startmsgread();
@@ -1681,7 +1681,7 @@ ProcessRepliesIfAny(void)
16811681
*/
16821682
if (received)
16831683
{
1684-
last_reply_timestamp=GetCurrentTimestamp();
1684+
last_reply_timestamp=last_processing;
16851685
waiting_for_ping_response= false;
16861686
}
16871687
}
@@ -2060,10 +2060,18 @@ WalSndComputeSleeptime(TimestampTz now)
20602060

20612061
/*
20622062
* Check whether there have been responses by the client within
2063-
* wal_sender_timeout and shutdown if not.
2063+
* wal_sender_timeout and shutdown if not. Using last_processing as the
2064+
* reference point avoids counting server-side stalls against the client.
2065+
* However, a long server-side stall can make WalSndKeepaliveIfNecessary()
2066+
* postdate last_processing by more than wal_sender_timeout. If that happens,
2067+
* the client must reply almost immediately to avoid a timeout. This rarely
2068+
* affects the default configuration, under which clients spontaneously send a
2069+
* message every standby_message_timeout = wal_sender_timeout/6 = 10s. We
2070+
* could eliminate that problem by recognizing timeout expiration at
2071+
* wal_sender_timeout/2 after the keepalive.
20642072
*/
20652073
staticvoid
2066-
WalSndCheckTimeOut(TimestampTznow)
2074+
WalSndCheckTimeOut(void)
20672075
{
20682076
TimestampTztimeout;
20692077

@@ -2074,7 +2082,7 @@ WalSndCheckTimeOut(TimestampTz now)
20742082
timeout=TimestampTzPlusMilliseconds(last_reply_timestamp,
20752083
wal_sender_timeout);
20762084

2077-
if (wal_sender_timeout>0&&now >=timeout)
2085+
if (wal_sender_timeout>0&&last_processing >=timeout)
20782086
{
20792087
/*
20802088
* Since typically expiration of replication timeout means
@@ -2105,8 +2113,6 @@ WalSndLoop(WalSndSendDataCallback send_data)
21052113
*/
21062114
for (;;)
21072115
{
2108-
TimestampTznow;
2109-
21102116
/*
21112117
* Emergency bailout if postmaster has died. This is to avoid the
21122118
* necessity for manual cleanup of all postmaster children.
@@ -2184,13 +2190,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
21842190
WalSndDone(send_data);
21852191
}
21862192

2187-
now=GetCurrentTimestamp();
2188-
21892193
/* Check for replication timeout. */
2190-
WalSndCheckTimeOut(now);
2194+
WalSndCheckTimeOut();
21912195

21922196
/* Send keepalive if the time has come */
2193-
WalSndKeepaliveIfNecessary(now);
2197+
WalSndKeepaliveIfNecessary();
21942198

21952199
/*
21962200
* We don't block if not caught up, unless there is unsent data
@@ -2208,7 +2212,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
22082212
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |WL_TIMEOUT |
22092213
WL_SOCKET_READABLE;
22102214

2211-
sleeptime=WalSndComputeSleeptime(now);
2215+
/*
2216+
* Use fresh timestamp, not last_processed, to reduce the chance
2217+
* of reaching wal_sender_timeout before sending a keepalive.
2218+
*/
2219+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
22122220

22132221
if (pq_is_send_pending())
22142222
wakeEvents |=WL_SOCKET_WRITEABLE;
@@ -3360,7 +3368,7 @@ WalSndKeepalive(bool requestReply)
33603368
* Send keepalive message if too much time has elapsed.
33613369
*/
33623370
staticvoid
3363-
WalSndKeepaliveIfNecessary(TimestampTznow)
3371+
WalSndKeepaliveIfNecessary(void)
33643372
{
33653373
TimestampTzping_time;
33663374

@@ -3381,7 +3389,7 @@ WalSndKeepaliveIfNecessary(TimestampTz now)
33813389
*/
33823390
ping_time=TimestampTzPlusMilliseconds(last_reply_timestamp,
33833391
wal_sender_timeout /2);
3384-
if (now >=ping_time)
3392+
if (last_processing >=ping_time)
33853393
{
33863394
WalSndKeepalive(true);
33873395
waiting_for_ping_response= true;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp