Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1ff92ee

Browse files
committed
Fix sloppiness in the timeline switch over streaming replication patch.
Here's another attempt at fixing the logic that decides how far the WAL canbe streamed, which was still broken if the timeline changed while streaming.You would get an assertion failure. The way the logic is now written is morereadable, too.Thom Brown reported the assertion failure.
1 parent36e4456 commit1ff92ee

File tree

1 file changed

+54
-47
lines changed

1 file changed

+54
-47
lines changed

‎src/backend/replication/walsender.c

Lines changed: 54 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ static void WalSndLoop(void);
169169
staticvoidInitWalSenderSlot(void);
170170
staticvoidWalSndKill(intcode,Datumarg);
171171
staticvoidXLogSend(bool*caughtup);
172-
staticXLogRecPtrGetStandbyFlushRecPtr(TimeLineIDcurrentTLI);
172+
staticXLogRecPtrGetStandbyFlushRecPtr(void);
173173
staticvoidIdentifySystem(void);
174174
staticvoidStartReplication(StartReplicationCmd*cmd);
175175
staticvoidProcessStandbyMessage(void);
@@ -250,7 +250,7 @@ IdentifySystem(void)
250250
if (am_cascading_walsender)
251251
{
252252
/* this also updates ThisTimeLineID */
253-
logptr=GetStandbyFlushRecPtr(0);
253+
logptr=GetStandbyFlushRecPtr();
254254
}
255255
else
256256
logptr=GetInsertRecPtr();
@@ -423,7 +423,7 @@ StartReplication(StartReplicationCmd *cmd)
423423
if (am_cascading_walsender)
424424
{
425425
/* this also updates ThisTimeLineID */
426-
FlushPtr=GetStandbyFlushRecPtr(0);
426+
FlushPtr=GetStandbyFlushRecPtr();
427427
}
428428
else
429429
FlushPtr=GetFlushRecPtr();
@@ -1310,7 +1310,6 @@ static void
13101310
XLogSend(bool*caughtup)
13111311
{
13121312
XLogRecPtrSendRqstPtr;
1313-
XLogRecPtrFlushPtr;
13141313
XLogRecPtrstartptr;
13151314
XLogRecPtrendptr;
13161315
Sizenbytes;
@@ -1321,33 +1320,39 @@ XLogSend(bool *caughtup)
13211320
return;
13221321
}
13231322

1324-
/*
1325-
* Attempt to send all data that's already been written out and fsync'd to
1326-
* disk. We cannot go further than what's been written out given the
1327-
* current implementation of XLogRead(). And in any case it's unsafe to
1328-
* send WAL that is not securely down to disk on the master: if the master
1329-
* subsequently crashes and restarts, slaves must not have applied any WAL
1330-
* that gets lost on the master.
1331-
*/
1332-
if (am_cascading_walsender)
1333-
FlushPtr=GetStandbyFlushRecPtr(sendTimeLine);
1334-
else
1335-
FlushPtr=GetFlushRecPtr();
1336-
1337-
/*
1338-
* In a cascading standby, the current recovery target timeline can
1339-
* change, or we can be promoted. In either case, the current timeline
1340-
* becomes historic. We need to detect that so that we don't try to stream
1341-
* past the point where we switched to another timeline. It's checked
1342-
* after calculating FlushPtr, to avoid a race condition: if the timeline
1343-
* becomes historic just after we checked that it was still current, it
1344-
* should still be OK to stream it up to the FlushPtr that was calculated
1345-
* before it became historic.
1346-
*/
1347-
if (!sendTimeLineIsHistoric&&am_cascading_walsender)
1323+
/* Figure out how far we can safely send the WAL. */
1324+
if (sendTimeLineIsHistoric)
13481325
{
1326+
/*
1327+
* Streaming an old timeline timeline that's in this server's history,
1328+
* but is not the one we're currently inserting or replaying. It can
1329+
* be streamed up to the point where we switched off that timeline.
1330+
*/
1331+
SendRqstPtr=sendTimeLineValidUpto;
1332+
}
1333+
elseif (am_cascading_walsender)
1334+
{
1335+
/*
1336+
* Streaming the latest timeline on a standby.
1337+
*
1338+
* Attempt to send all WAL that has already been replayed, so that
1339+
* we know it's valid. If we're receiving WAL through streaming
1340+
* replication, it's also OK to send any WAL that has been received
1341+
* but not replayed.
1342+
*
1343+
* The timeline we're recovering from can change, or we can be
1344+
* promoted. In either case, the current timeline becomes historic.
1345+
* We need to detect that so that we don't try to stream past the
1346+
* point where we switched to another timeline. We check for promotion
1347+
* or timeline switch after calculating FlushPtr, to avoid a race
1348+
* condition: if the timeline becomes historic just after we checked
1349+
* that it was still current, it's still be OK to stream it up to the
1350+
* FlushPtr that was calculated before it became historic.
1351+
*/
13491352
boolbecameHistoric= false;
13501353

1354+
SendRqstPtr=GetStandbyFlushRecPtr();
1355+
13511356
if (!RecoveryInProgress())
13521357
{
13531358
/*
@@ -1361,7 +1366,8 @@ XLogSend(bool *caughtup)
13611366
{
13621367
/*
13631368
* Still a cascading standby. But is the timeline we're sending
1364-
* still the one recovery is recovering from?
1369+
* still the one recovery is recovering from? ThisTimeLineID was
1370+
* updated by the GetStandbyFlushRecPtr() call above.
13651371
*/
13661372
if (sendTimeLine!=ThisTimeLineID)
13671373
becameHistoric= true;
@@ -1391,8 +1397,24 @@ XLogSend(bool *caughtup)
13911397
(uint32)sentPtr);
13921398

13931399
sendTimeLineIsHistoric= true;
1400+
1401+
SendRqstPtr=sendTimeLineValidUpto;
13941402
}
13951403
}
1404+
else
1405+
{
1406+
/*
1407+
* Streaming the current timeline on a master.
1408+
*
1409+
* Attempt to send all data that's already been written out and
1410+
* fsync'd to disk. We cannot go further than what's been written out
1411+
* given the current implementation of XLogRead(). And in any case
1412+
* it's unsafe to send WAL that is not securely down to disk on the
1413+
* master: if the master subsequently crashes and restarts, slaves
1414+
* must not have applied any WAL that gets lost on the master.
1415+
*/
1416+
SendRqstPtr=GetFlushRecPtr();
1417+
}
13961418

13971419
/*
13981420
* If this is a historic timeline and we've reached the point where we
@@ -1413,15 +1435,7 @@ XLogSend(bool *caughtup)
14131435
return;
14141436
}
14151437

1416-
/*
1417-
* Stream up to the point known to be flushed to disk, or to the end of
1418-
* this timeline, whichever comes first.
1419-
*/
1420-
if (sendTimeLineIsHistoric&&XLByteLT(sendTimeLineValidUpto,FlushPtr))
1421-
SendRqstPtr=sendTimeLineValidUpto;
1422-
else
1423-
SendRqstPtr=FlushPtr;
1424-
1438+
/* Do we have any work to do? */
14251439
Assert(XLByteLE(sentPtr,SendRqstPtr));
14261440
if (XLByteLE(SendRqstPtr,sentPtr))
14271441
{
@@ -1522,15 +1536,11 @@ XLogSend(bool *caughtup)
15221536
* can be sent to the standby. This should only be called when in recovery,
15231537
* ie. we're streaming to a cascaded standby.
15241538
*
1525-
* If currentTLI is non-zero, the function returns the point that the WAL on
1526-
* the given timeline has been flushed upto. If recovery has already switched
1527-
* to a different timeline, InvalidXLogRecPtr is returned.
1528-
*
15291539
* As a side-effect, ThisTimeLineID is updated to the TLI of the last
15301540
* replayed WAL record.
15311541
*/
15321542
staticXLogRecPtr
1533-
GetStandbyFlushRecPtr(TimeLineIDcurrentTLI)
1543+
GetStandbyFlushRecPtr(void)
15341544
{
15351545
XLogRecPtrreplayPtr;
15361546
TimeLineIDreplayTLI;
@@ -1549,11 +1559,8 @@ GetStandbyFlushRecPtr(TimeLineID currentTLI)
15491559

15501560
ThisTimeLineID=replayTLI;
15511561

1552-
if (currentTLI!=replayTLI&&currentTLI!=0)
1553-
returnInvalidXLogRecPtr;
1554-
15551562
result=replayPtr;
1556-
if (receiveTLI==currentTLI&&receivePtr>replayPtr)
1563+
if (receiveTLI==ThisTimeLineID&&receivePtr>replayPtr)
15571564
result=receivePtr;
15581565

15591566
returnresult;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp