Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit081e410

Browse files
committed
Ignore server-side delays when enforcing wal_sender_timeout.
Healthy clients of servers having poor I/O performance, such asbuildfarm members hamster and tern, saw unexpected timeouts. Thatdisagreed with documentation. This fix adds one gettimeofday() callwhenever ProcessRepliesIfAny() finds no client reply messages.Back-patch to 9.4; the bug's symptom is rare and mild, and the code allmoved between 9.3 and 9.4.Discussion:https://postgr.es/m/20180826034600.GA1105084@rfd.leadboat.com
1 parent4a9a5bb commit081e410

File tree

1 file changed

+36
-28
lines changed

1 file changed

+36
-28
lines changed

‎src/backend/replication/walsender.c

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,12 @@ static StringInfoData output_message;
154154
staticStringInfoDatareply_message;
155155
staticStringInfoDatatmpbuf;
156156

157+
/* Timestamp of last ProcessRepliesIfAny(). */
158+
staticTimestampTzlast_processing=0;
159+
157160
/*
158-
* Timestamp ofthelastreceipt of thereply from the standby. Set to 0 if
159-
* wal_sender_timeout doesn't need to be active.
161+
* Timestamp of lastProcessRepliesIfAny() that saw areply from the
162+
*standby. Set to 0 ifwal_sender_timeout doesn't need to be active.
160163
*/
161164
staticTimestampTzlast_reply_timestamp=0;
162165

@@ -213,8 +216,8 @@ static void ProcessStandbyReplyMessage(void);
213216
staticvoidProcessStandbyHSFeedbackMessage(void);
214217
staticvoidProcessRepliesIfAny(void);
215218
staticvoidWalSndKeepalive(boolrequestReply);
216-
staticvoidWalSndKeepaliveIfNecessary(TimestampTznow);
217-
staticvoidWalSndCheckTimeOut(TimestampTznow);
219+
staticvoidWalSndKeepaliveIfNecessary(void);
220+
staticvoidWalSndCheckTimeOut(void);
218221
staticlongWalSndComputeSleeptime(TimestampTznow);
219222
staticvoidWalSndPrepareWrite(LogicalDecodingContext*ctx,XLogRecPtrlsn,TransactionIdxid,boollast_write);
220223
staticvoidWalSndWriteData(LogicalDecodingContext*ctx,XLogRecPtrlsn,TransactionIdxid,boollast_write);
@@ -1117,18 +1120,16 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
11171120
/* Check for input from the client */
11181121
ProcessRepliesIfAny();
11191122

1120-
now=GetCurrentTimestamp();
1121-
11221123
/* die if timeout was reached */
1123-
WalSndCheckTimeOut(now);
1124+
WalSndCheckTimeOut();
11241125

11251126
/* Send keepalive if the time has come */
1126-
WalSndKeepaliveIfNecessary(now);
1127+
WalSndKeepaliveIfNecessary();
11271128

11281129
if (!pq_is_send_pending())
11291130
break;
11301131

1131-
sleeptime=WalSndComputeSleeptime(now);
1132+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
11321133

11331134
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |
11341135
WL_SOCKET_WRITEABLE |WL_SOCKET_READABLE |WL_TIMEOUT;
@@ -1198,7 +1199,6 @@ WalSndWaitForWal(XLogRecPtr loc)
11981199
for (;;)
11991200
{
12001201
longsleeptime;
1201-
TimestampTznow;
12021202

12031203
/*
12041204
* Emergency bailout if postmaster has died. This is to avoid the
@@ -1283,13 +1283,11 @@ WalSndWaitForWal(XLogRecPtr loc)
12831283
!pq_is_send_pending())
12841284
break;
12851285

1286-
now=GetCurrentTimestamp();
1287-
12881286
/* die if timeout was reached */
1289-
WalSndCheckTimeOut(now);
1287+
WalSndCheckTimeOut();
12901288

12911289
/* Send keepalive if the time has come */
1292-
WalSndKeepaliveIfNecessary(now);
1290+
WalSndKeepaliveIfNecessary();
12931291

12941292
/*
12951293
* Sleep until something happens or we time out. Also wait for the
@@ -1298,7 +1296,7 @@ WalSndWaitForWal(XLogRecPtr loc)
12981296
* new WAL to be generated. (But if we have nothing to send, we don't
12991297
* want to wake on socket-writable.)
13001298
*/
1301-
sleeptime=WalSndComputeSleeptime(now);
1299+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
13021300

13031301
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |
13041302
WL_SOCKET_READABLE |WL_TIMEOUT;
@@ -1438,6 +1436,8 @@ ProcessRepliesIfAny(void)
14381436
intr;
14391437
boolreceived= false;
14401438

1439+
last_processing=GetCurrentTimestamp();
1440+
14411441
for (;;)
14421442
{
14431443
pq_startmsgread();
@@ -1525,7 +1525,7 @@ ProcessRepliesIfAny(void)
15251525
*/
15261526
if (received)
15271527
{
1528-
last_reply_timestamp=GetCurrentTimestamp();
1528+
last_reply_timestamp=last_processing;
15291529
waiting_for_ping_response= false;
15301530
}
15311531
}
@@ -1818,10 +1818,18 @@ WalSndComputeSleeptime(TimestampTz now)
18181818

18191819
/*
18201820
* Check whether there have been responses by the client within
1821-
* wal_sender_timeout and shutdown if not.
1821+
* wal_sender_timeout and shutdown if not. Using last_processing as the
1822+
* reference point avoids counting server-side stalls against the client.
1823+
* However, a long server-side stall can make WalSndKeepaliveIfNecessary()
1824+
* postdate last_processing by more than wal_sender_timeout. If that happens,
1825+
* the client must reply almost immediately to avoid a timeout. This rarely
1826+
* affects the default configuration, under which clients spontaneously send a
1827+
* message every standby_message_timeout = wal_sender_timeout/6 = 10s. We
1828+
* could eliminate that problem by recognizing timeout expiration at
1829+
* wal_sender_timeout/2 after the keepalive.
18221830
*/
18231831
staticvoid
1824-
WalSndCheckTimeOut(TimestampTznow)
1832+
WalSndCheckTimeOut(void)
18251833
{
18261834
TimestampTztimeout;
18271835

@@ -1832,7 +1840,7 @@ WalSndCheckTimeOut(TimestampTz now)
18321840
timeout=TimestampTzPlusMilliseconds(last_reply_timestamp,
18331841
wal_sender_timeout);
18341842

1835-
if (wal_sender_timeout>0&&now >=timeout)
1843+
if (wal_sender_timeout>0&&last_processing >=timeout)
18361844
{
18371845
/*
18381846
* Since typically expiration of replication timeout means
@@ -1863,8 +1871,6 @@ WalSndLoop(WalSndSendDataCallback send_data)
18631871
*/
18641872
for (;;)
18651873
{
1866-
TimestampTznow;
1867-
18681874
/*
18691875
* Emergency bailout if postmaster has died. This is to avoid the
18701876
* necessity for manual cleanup of all postmaster children.
@@ -1942,13 +1948,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
19421948
WalSndDone(send_data);
19431949
}
19441950

1945-
now=GetCurrentTimestamp();
1946-
19471951
/* Check for replication timeout. */
1948-
WalSndCheckTimeOut(now);
1952+
WalSndCheckTimeOut();
19491953

19501954
/* Send keepalive if the time has come */
1951-
WalSndKeepaliveIfNecessary(now);
1955+
WalSndKeepaliveIfNecessary();
19521956

19531957
/*
19541958
* We don't block if not caught up, unless there is unsent data
@@ -1966,7 +1970,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
19661970
wakeEvents=WL_LATCH_SET |WL_POSTMASTER_DEATH |WL_TIMEOUT |
19671971
WL_SOCKET_READABLE;
19681972

1969-
sleeptime=WalSndComputeSleeptime(now);
1973+
/*
1974+
* Use fresh timestamp, not last_processed, to reduce the chance
1975+
* of reaching wal_sender_timeout before sending a keepalive.
1976+
*/
1977+
sleeptime=WalSndComputeSleeptime(GetCurrentTimestamp());
19701978

19711979
if (pq_is_send_pending())
19721980
wakeEvents |=WL_SOCKET_WRITEABLE;
@@ -3037,7 +3045,7 @@ WalSndKeepalive(bool requestReply)
30373045
* Send keepalive message if too much time has elapsed.
30383046
*/
30393047
staticvoid
3040-
WalSndKeepaliveIfNecessary(TimestampTznow)
3048+
WalSndKeepaliveIfNecessary(void)
30413049
{
30423050
TimestampTzping_time;
30433051

@@ -3058,7 +3066,7 @@ WalSndKeepaliveIfNecessary(TimestampTz now)
30583066
*/
30593067
ping_time=TimestampTzPlusMilliseconds(last_reply_timestamp,
30603068
wal_sender_timeout /2);
3061-
if (now >=ping_time)
3069+
if (last_processing >=ping_time)
30623070
{
30633071
WalSndKeepalive(true);
30643072
waiting_for_ping_response= true;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp