Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitab0ed61

Browse files
committed
Ignore server-side delays when enforcing wal_sender_timeout.
Healthy clients of servers having poor I/O performance, such asbuildfarm members hamster and tern, saw unexpected timeouts. Thatdisagreed with documentation. This fix adds one gettimeofday() callwhenever ProcessRepliesIfAny() finds no client reply messages.Back-patch to 9.4; the bug's symptom is rare and mild, and the code allmoved between 9.3 and 9.4.Discussion:https://postgr.es/m/20180826034600.GA1105084@rfd.leadboat.com
1 parent5758685 commitab0ed61

File tree

1 file changed

+36
-28
lines changed

1 file changed

+36
-28
lines changed

‎src/backend/replication/walsender.c

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,12 @@ static StringInfoData output_message;
161161
staticStringInfoDatareply_message;
162162
staticStringInfoDatatmpbuf;
163163

164+
/* Timestamp of last ProcessRepliesIfAny(). */
165+
staticTimestampTzlast_processing=0;
166+
164167
/*
165-
* Timestamp ofthelastreceipt of thereply from the standby. Set to 0 if
166-
* wal_sender_timeout doesn't need to be active.
168+
* Timestamp of lastProcessRepliesIfAny() that saw areply from the
169+
*standby. Set to 0 ifwal_sender_timeout doesn't need to be active.
167170
*/
168171
staticTimestampTzlast_reply_timestamp=0;
169172

@@ -240,8 +243,8 @@ static void ProcessStandbyReplyMessage(void);
240243
staticvoidProcessStandbyHSFeedbackMessage(void);
241244
staticvoidProcessRepliesIfAny(void);
242245
staticvoidWalSndKeepalive(boolrequestReply);
243-
staticvoidWalSndKeepaliveIfNecessary(TimestampTznow);
244-
staticvoidWalSndCheckTimeOut(TimestampTznow);
246+
staticvoidWalSndKeepaliveIfNecessary(void);
247+
staticvoidWalSndCheckTimeOut(void);
245248
staticlongWalSndComputeSleeptime(TimestampTznow);
246249
staticvoidWalSndPrepareWrite(LogicalDecodingContext*ctx,XLogRecPtrlsn,TransactionIdxid,boollast_write);
247250
staticvoidWalSndWriteData(LogicalDecodingContext*ctx,XLogRecPtrlsn,TransactionIdxid,boollast_write);
@@ -1202,18 +1205,16 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
12021205
/* Check for input from the client */
12031206
ProcessRepliesIfAny();
12041207

1205-
now=GetCurrentTimestamp();
1206-
12071208
/* die if timeout was reached */
1208-
WalSndCheckTimeOut(now);
1209+
WalSndCheckTimeOut();
12091210

12101211
/* Send keepalive if the time has come */
1211-
WalSndKeepaliveIfNecessary(now);
1212+
WalSndKeepaliveIfNecessary();
12121213

12131214
if (!pq_is_send_pending())
12141215
break;
12151216

1216-
sleeptime=WalSndComputeSleeptime(now);
1217+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
12171218

12181219
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |
12191220
WL_SOCKET_WRITEABLE |WL_SOCKET_READABLE |WL_TIMEOUT;
@@ -1308,7 +1309,6 @@ WalSndWaitForWal(XLogRecPtr loc)
13081309
for (;;)
13091310
{
13101311
longsleeptime;
1311-
TimestampTznow;
13121312

13131313
/*
13141314
* Emergency bailout if postmaster has died. This is to avoid the
@@ -1393,13 +1393,11 @@ WalSndWaitForWal(XLogRecPtr loc)
13931393
!pq_is_send_pending())
13941394
break;
13951395

1396-
now=GetCurrentTimestamp();
1397-
13981396
/* die if timeout was reached */
1399-
WalSndCheckTimeOut(now);
1397+
WalSndCheckTimeOut();
14001398

14011399
/* Send keepalive if the time has come */
1402-
WalSndKeepaliveIfNecessary(now);
1400+
WalSndKeepaliveIfNecessary();
14031401

14041402
/*
14051403
* Sleep until something happens or we time out. Also wait for the
@@ -1408,7 +1406,7 @@ WalSndWaitForWal(XLogRecPtr loc)
14081406
* new WAL to be generated. (But if we have nothing to send, we don't
14091407
* want to wake on socket-writable.)
14101408
*/
1411-
sleeptime=WalSndComputeSleeptime(now);
1409+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
14121410

14131411
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |
14141412
WL_SOCKET_READABLE |WL_TIMEOUT;
@@ -1605,6 +1603,8 @@ ProcessRepliesIfAny(void)
16051603
intr;
16061604
boolreceived= false;
16071605

1606+
last_processing=GetCurrentTimestamp();
1607+
16081608
for (;;)
16091609
{
16101610
pq_startmsgread();
@@ -1692,7 +1692,7 @@ ProcessRepliesIfAny(void)
16921692
*/
16931693
if (received)
16941694
{
1695-
last_reply_timestamp=GetCurrentTimestamp();
1695+
last_reply_timestamp=last_processing;
16961696
waiting_for_ping_response= false;
16971697
}
16981698
}
@@ -2071,10 +2071,18 @@ WalSndComputeSleeptime(TimestampTz now)
20712071

20722072
/*
20732073
* Check whether there have been responses by the client within
2074-
* wal_sender_timeout and shutdown if not.
2074+
* wal_sender_timeout and shutdown if not. Using last_processing as the
2075+
* reference point avoids counting server-side stalls against the client.
2076+
* However, a long server-side stall can make WalSndKeepaliveIfNecessary()
2077+
* postdate last_processing by more than wal_sender_timeout. If that happens,
2078+
* the client must reply almost immediately to avoid a timeout. This rarely
2079+
* affects the default configuration, under which clients spontaneously send a
2080+
* message every standby_message_timeout = wal_sender_timeout/6 = 10s. We
2081+
* could eliminate that problem by recognizing timeout expiration at
2082+
* wal_sender_timeout/2 after the keepalive.
20752083
*/
20762084
staticvoid
2077-
WalSndCheckTimeOut(TimestampTznow)
2085+
WalSndCheckTimeOut(void)
20782086
{
20792087
TimestampTztimeout;
20802088

@@ -2085,7 +2093,7 @@ WalSndCheckTimeOut(TimestampTz now)
20852093
timeout=TimestampTzPlusMilliseconds(last_reply_timestamp,
20862094
wal_sender_timeout);
20872095

2088-
if (wal_sender_timeout>0&&now >=timeout)
2096+
if (wal_sender_timeout>0&&last_processing >=timeout)
20892097
{
20902098
/*
20912099
* Since typically expiration of replication timeout means
@@ -2116,8 +2124,6 @@ WalSndLoop(WalSndSendDataCallback send_data)
21162124
*/
21172125
for (;;)
21182126
{
2119-
TimestampTznow;
2120-
21212127
/*
21222128
* Emergency bailout if postmaster has died. This is to avoid the
21232129
* necessity for manual cleanup of all postmaster children.
@@ -2195,13 +2201,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
21952201
WalSndDone(send_data);
21962202
}
21972203

2198-
now=GetCurrentTimestamp();
2199-
22002204
/* Check for replication timeout. */
2201-
WalSndCheckTimeOut(now);
2205+
WalSndCheckTimeOut();
22022206

22032207
/* Send keepalive if the time has come */
2204-
WalSndKeepaliveIfNecessary(now);
2208+
WalSndKeepaliveIfNecessary();
22052209

22062210
/*
22072211
* We don't block if not caught up, unless there is unsent data
@@ -2219,7 +2223,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
22192223
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |WL_TIMEOUT |
22202224
WL_SOCKET_READABLE;
22212225

2222-
sleeptime=WalSndComputeSleeptime(now);
2226+
/*
2227+
* Use fresh timestamp, not last_processed, to reduce the chance
2228+
* of reaching wal_sender_timeout before sending a keepalive.
2229+
*/
2230+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
22232231

22242232
if (pq_is_send_pending())
22252233
wakeEvents |=WL_SOCKET_WRITEABLE;
@@ -3379,7 +3387,7 @@ WalSndKeepalive(bool requestReply)
33793387
* Send keepalive message if too much time has elapsed.
33803388
*/
33813389
staticvoid
3382-
WalSndKeepaliveIfNecessary(TimestampTznow)
3390+
WalSndKeepaliveIfNecessary(void)
33833391
{
33843392
TimestampTzping_time;
33853393

@@ -3400,7 +3408,7 @@ WalSndKeepaliveIfNecessary(TimestampTz now)
34003408
*/
34013409
ping_time=TimestampTzPlusMilliseconds(last_reply_timestamp,
34023410
wal_sender_timeout /2);
3403-
if (now >=ping_time)
3411+
if (last_processing >=ping_time)
34043412
{
34053413
WalSndKeepalive(true);
34063414
waiting_for_ping_response= true;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp