Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitee0ab27

Browse files
committed
Ignore server-side delays when enforcing wal_sender_timeout.
Healthy clients of servers having poor I/O performance, such asbuildfarm members hamster and tern, saw unexpected timeouts. Thatdisagreed with documentation. This fix adds one gettimeofday() callwhenever ProcessRepliesIfAny() finds no client reply messages.Back-patch to 9.4; the bug's symptom is rare and mild, and the code allmoved between 9.3 and 9.4.Discussion:https://postgr.es/m/20180826034600.GA1105084@rfd.leadboat.com
1 parent1f349aa commitee0ab27

File tree

1 file changed

+36
-28
lines changed

1 file changed

+36
-28
lines changed

‎src/backend/replication/walsender.c

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,12 @@ static StringInfoData output_message;
162162
staticStringInfoDatareply_message;
163163
staticStringInfoDatatmpbuf;
164164

165+
/* Timestamp of last ProcessRepliesIfAny(). */
166+
staticTimestampTzlast_processing=0;
167+
165168
/*
166-
* Timestamp ofthelastreceipt of thereply from the standby. Set to 0 if
167-
* wal_sender_timeout doesn't need to be active.
169+
* Timestamp of lastProcessRepliesIfAny() that saw areply from the
170+
*standby. Set to 0 ifwal_sender_timeout doesn't need to be active.
168171
*/
169172
staticTimestampTzlast_reply_timestamp=0;
170173

@@ -241,8 +244,8 @@ static void ProcessStandbyReplyMessage(void);
241244
staticvoidProcessStandbyHSFeedbackMessage(void);
242245
staticvoidProcessRepliesIfAny(void);
243246
staticvoidWalSndKeepalive(boolrequestReply);
244-
staticvoidWalSndKeepaliveIfNecessary(TimestampTznow);
245-
staticvoidWalSndCheckTimeOut(TimestampTznow);
247+
staticvoidWalSndKeepaliveIfNecessary(void);
248+
staticvoidWalSndCheckTimeOut(void);
246249
staticlongWalSndComputeSleeptime(TimestampTznow);
247250
staticvoidWalSndPrepareWrite(LogicalDecodingContext*ctx,XLogRecPtrlsn,TransactionIdxid,boollast_write);
248251
staticvoidWalSndWriteData(LogicalDecodingContext*ctx,XLogRecPtrlsn,TransactionIdxid,boollast_write);
@@ -1195,18 +1198,16 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
11951198
/* Check for input from the client */
11961199
ProcessRepliesIfAny();
11971200

1198-
now=GetCurrentTimestamp();
1199-
12001201
/* die if timeout was reached */
1201-
WalSndCheckTimeOut(now);
1202+
WalSndCheckTimeOut();
12021203

12031204
/* Send keepalive if the time has come */
1204-
WalSndKeepaliveIfNecessary(now);
1205+
WalSndKeepaliveIfNecessary();
12051206

12061207
if (!pq_is_send_pending())
12071208
break;
12081209

1209-
sleeptime=WalSndComputeSleeptime(now);
1210+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
12101211

12111212
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |
12121213
WL_SOCKET_WRITEABLE |WL_SOCKET_READABLE |WL_TIMEOUT;
@@ -1301,7 +1302,6 @@ WalSndWaitForWal(XLogRecPtr loc)
13011302
for (;;)
13021303
{
13031304
longsleeptime;
1304-
TimestampTznow;
13051305

13061306
/*
13071307
* Emergency bailout if postmaster has died. This is to avoid the
@@ -1386,13 +1386,11 @@ WalSndWaitForWal(XLogRecPtr loc)
13861386
!pq_is_send_pending())
13871387
break;
13881388

1389-
now=GetCurrentTimestamp();
1390-
13911389
/* die if timeout was reached */
1392-
WalSndCheckTimeOut(now);
1390+
WalSndCheckTimeOut();
13931391

13941392
/* Send keepalive if the time has come */
1395-
WalSndKeepaliveIfNecessary(now);
1393+
WalSndKeepaliveIfNecessary();
13961394

13971395
/*
13981396
* Sleep until something happens or we time out. Also wait for the
@@ -1401,7 +1399,7 @@ WalSndWaitForWal(XLogRecPtr loc)
14011399
* new WAL to be generated. (But if we have nothing to send, we don't
14021400
* want to wake on socket-writable.)
14031401
*/
1404-
sleeptime=WalSndComputeSleeptime(now);
1402+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
14051403

14061404
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |
14071405
WL_SOCKET_READABLE |WL_TIMEOUT;
@@ -1598,6 +1596,8 @@ ProcessRepliesIfAny(void)
15981596
intr;
15991597
boolreceived= false;
16001598

1599+
last_processing=GetCurrentTimestamp();
1600+
16011601
for (;;)
16021602
{
16031603
pq_startmsgread();
@@ -1685,7 +1685,7 @@ ProcessRepliesIfAny(void)
16851685
*/
16861686
if (received)
16871687
{
1688-
last_reply_timestamp=GetCurrentTimestamp();
1688+
last_reply_timestamp=last_processing;
16891689
waiting_for_ping_response= false;
16901690
}
16911691
}
@@ -2064,10 +2064,18 @@ WalSndComputeSleeptime(TimestampTz now)
20642064

20652065
/*
20662066
* Check whether there have been responses by the client within
2067-
* wal_sender_timeout and shutdown if not.
2067+
* wal_sender_timeout and shutdown if not. Using last_processing as the
2068+
* reference point avoids counting server-side stalls against the client.
2069+
* However, a long server-side stall can make WalSndKeepaliveIfNecessary()
2070+
* postdate last_processing by more than wal_sender_timeout. If that happens,
2071+
* the client must reply almost immediately to avoid a timeout. This rarely
2072+
* affects the default configuration, under which clients spontaneously send a
2073+
* message every standby_message_timeout = wal_sender_timeout/6 = 10s. We
2074+
* could eliminate that problem by recognizing timeout expiration at
2075+
* wal_sender_timeout/2 after the keepalive.
20682076
*/
20692077
staticvoid
2070-
WalSndCheckTimeOut(TimestampTznow)
2078+
WalSndCheckTimeOut(void)
20712079
{
20722080
TimestampTztimeout;
20732081

@@ -2078,7 +2086,7 @@ WalSndCheckTimeOut(TimestampTz now)
20782086
timeout=TimestampTzPlusMilliseconds(last_reply_timestamp,
20792087
wal_sender_timeout);
20802088

2081-
if (wal_sender_timeout>0&&now >=timeout)
2089+
if (wal_sender_timeout>0&&last_processing >=timeout)
20822090
{
20832091
/*
20842092
* Since typically expiration of replication timeout means
@@ -2109,8 +2117,6 @@ WalSndLoop(WalSndSendDataCallback send_data)
21092117
*/
21102118
for (;;)
21112119
{
2112-
TimestampTznow;
2113-
21142120
/*
21152121
* Emergency bailout if postmaster has died. This is to avoid the
21162122
* necessity for manual cleanup of all postmaster children.
@@ -2188,13 +2194,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
21882194
WalSndDone(send_data);
21892195
}
21902196

2191-
now=GetCurrentTimestamp();
2192-
21932197
/* Check for replication timeout. */
2194-
WalSndCheckTimeOut(now);
2198+
WalSndCheckTimeOut();
21952199

21962200
/* Send keepalive if the time has come */
2197-
WalSndKeepaliveIfNecessary(now);
2201+
WalSndKeepaliveIfNecessary();
21982202

21992203
/*
22002204
* We don't block if not caught up, unless there is unsent data
@@ -2212,7 +2216,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
22122216
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |WL_TIMEOUT |
22132217
WL_SOCKET_READABLE;
22142218

2215-
sleeptime=WalSndComputeSleeptime(now);
2219+
/*
2220+
* Use fresh timestamp, not last_processed, to reduce the chance
2221+
* of reaching wal_sender_timeout before sending a keepalive.
2222+
*/
2223+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
22162224

22172225
if (pq_is_send_pending())
22182226
wakeEvents |=WL_SOCKET_WRITEABLE;
@@ -3364,7 +3372,7 @@ WalSndKeepalive(bool requestReply)
33643372
* Send keepalive message if too much time has elapsed.
33653373
*/
33663374
staticvoid
3367-
WalSndKeepaliveIfNecessary(TimestampTznow)
3375+
WalSndKeepaliveIfNecessary(void)
33683376
{
33693377
TimestampTzping_time;
33703378

@@ -3385,7 +3393,7 @@ WalSndKeepaliveIfNecessary(TimestampTz now)
33853393
*/
33863394
ping_time=TimestampTzPlusMilliseconds(last_reply_timestamp,
33873395
wal_sender_timeout /2);
3388-
if (now >=ping_time)
3396+
if (last_processing >=ping_time)
33893397
{
33903398
WalSndKeepalive(true);
33913399
waiting_for_ping_response= true;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp