Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit009eeee

Browse files
committed
pg_rewind: Fix determining TLI when server was just promoted.
If the source server was just promoted, and it hasn't written thecheckpoint record yet, pg_rewind considered the server to be still onthe old timeline. Because of that, it would claim incorrectly that norewind is required. Fix that by looking at minRecoveryPointTLI in thecontrol file in addition to the ThisTimeLineID on the checkpoint.This has been a known issue since forever, and we had worked around itin the regression tests by issuing a checkpoint after each promotion,before running pg_rewind. But that was always quite hacky, so betterto fix this properly. This doesn't add any new tests for this, butremoves the previously-added workarounds from the existing tests, sothat they should occasionally hit this codepath again.This is arguably a bug fix, but don't backpatch because we haven'treally treated it as a bug so far. Also, the patch didn't applycleanly to v13 and below. I'm sure sure it could be made to work onv13, but doesn't seem worth the risk and effort.Reviewed-by: Kyotaro Horiguchi, Ibrar Ahmed, Aleksander AlekseevDiscussion:https://www.postgresql.org/message-id/9f568c97-87fe-a716-bd39-65299b8a60f4%40iki.fi
1 parent75c7376 commit009eeee

File tree

4 files changed

+64
-59
lines changed

4 files changed

+64
-59
lines changed

‎src/bin/pg_rewind/pg_rewind.c

Lines changed: 64 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,13 @@ static void digestControlFile(ControlFileData *ControlFile,
4545
constchar*content,size_tsize);
4646
staticvoidgetRestoreCommand(constchar*argv0);
4747
staticvoidsanityChecks(void);
48-
staticvoidfindCommonAncestorTimeline(XLogRecPtr*recptr,int*tliIndex);
48+
staticTimeLineHistoryEntry*getTimelineHistory(TimeLineIDtli,boolis_source,
49+
int*nentries);
50+
staticvoidfindCommonAncestorTimeline(TimeLineHistoryEntry*a_history,
51+
inta_nentries,
52+
TimeLineHistoryEntry*b_history,
53+
intb_nentries,
54+
XLogRecPtr*recptr,int*tliIndex);
4955
staticvoidensureCleanShutdown(constchar*argv0);
5056
staticvoiddisconnect_atexit(void);
5157

@@ -134,6 +140,8 @@ main(int argc, char **argv)
134140
XLogRecPtrchkptrec;
135141
TimeLineIDchkpttli;
136142
XLogRecPtrchkptredo;
143+
TimeLineIDsource_tli;
144+
TimeLineIDtarget_tli;
137145
XLogRecPtrtarget_wal_endrec;
138146
size_tsize;
139147
char*buffer;
@@ -332,14 +340,28 @@ main(int argc, char **argv)
332340

333341
sanityChecks();
334342

343+
/*
344+
* Usually, the TLI can be found in the latest checkpoint record. But if
345+
* the source server is just being promoted (or it's a standby that's
346+
* following a primary that's just being promoted), and the checkpoint
347+
* requested by the promotion hasn't completed yet, the latest timeline is
348+
* in minRecoveryPoint. So we check which is later, the TLI of the
349+
* minRecoveryPoint or the latest checkpoint.
350+
*/
351+
source_tli=Max(ControlFile_source.minRecoveryPointTLI,
352+
ControlFile_source.checkPointCopy.ThisTimeLineID);
353+
354+
/* Similarly for the target. */
355+
target_tli=Max(ControlFile_target.minRecoveryPointTLI,
356+
ControlFile_target.checkPointCopy.ThisTimeLineID);
357+
335358
/*
336359
* Find the common ancestor timeline between the clusters.
337360
*
338361
* If both clusters are already on the same timeline, there's nothing to
339362
* do.
340363
*/
341-
if (ControlFile_target.checkPointCopy.ThisTimeLineID==
342-
ControlFile_source.checkPointCopy.ThisTimeLineID)
364+
if (target_tli==source_tli)
343365
{
344366
pg_log_info("source and target cluster are on the same timeline");
345367
rewind_needed= false;
@@ -348,12 +370,31 @@ main(int argc, char **argv)
348370
else
349371
{
350372
XLogRecPtrchkptendrec;
373+
TimeLineHistoryEntry*sourceHistory;
374+
intsourceNentries;
375+
376+
/*
377+
* Retrieve timelines for both source and target, and find the point
378+
* where they diverged.
379+
*/
380+
sourceHistory=getTimelineHistory(source_tli, true,&sourceNentries);
381+
targetHistory=getTimelineHistory(target_tli, false,&targetNentries);
382+
383+
findCommonAncestorTimeline(sourceHistory,sourceNentries,
384+
targetHistory,targetNentries,
385+
&divergerec,&lastcommontliIndex);
351386

352-
findCommonAncestorTimeline(&divergerec,&lastcommontliIndex);
353387
pg_log_info("servers diverged at WAL location %X/%X on timeline %u",
354388
LSN_FORMAT_ARGS(divergerec),
355389
targetHistory[lastcommontliIndex].tli);
356390

391+
/*
392+
* Don't need the source history anymore. The target history is still
393+
* needed by the routines in parsexlog.c, when we read the target WAL.
394+
*/
395+
pfree(sourceHistory);
396+
397+
357398
/*
358399
* Determine the end-of-WAL on the target.
359400
*
@@ -654,7 +695,8 @@ perform_rewind(filemap_t *filemap, rewind_source *source,
654695
pg_fatal("source system was in unexpected state at end of rewind");
655696

656697
endrec=source->get_current_wal_insert_lsn(source);
657-
endtli=ControlFile_source_after.checkPointCopy.ThisTimeLineID;
698+
endtli=Max(ControlFile_source_after.checkPointCopy.ThisTimeLineID,
699+
ControlFile_source_after.minRecoveryPointTLI);
658700
}
659701
}
660702
else
@@ -796,16 +838,12 @@ MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
796838
}
797839

798840
/*
799-
* Retrieve timeline history for given control file which should behold
800-
* either source or target.
841+
* Retrieve timeline history for the source or target system.
801842
*/
802843
staticTimeLineHistoryEntry*
803-
getTimelineHistory(ControlFileData*controlFile,int*nentries)
844+
getTimelineHistory(TimeLineIDtli,boolis_source,int*nentries)
804845
{
805846
TimeLineHistoryEntry*history;
806-
TimeLineIDtli;
807-
808-
tli=controlFile->checkPointCopy.ThisTimeLineID;
809847

810848
/*
811849
* Timeline 1 does not have a history file, so there is no need to check
@@ -826,12 +864,10 @@ getTimelineHistory(ControlFileData *controlFile, int *nentries)
826864
TLHistoryFilePath(path,tli);
827865

828866
/* Get history file from appropriate source */
829-
if (controlFile==&ControlFile_source)
867+
if (is_source)
830868
histfile=source->fetch_file(source,path,NULL);
831-
elseif (controlFile==&ControlFile_target)
832-
histfile=slurpFile(datadir_target,path,NULL);
833869
else
834-
pg_fatal("invalid control file");
870+
histfile=slurpFile(datadir_target,path,NULL);
835871

836872
history=rewind_parseTimeLineHistory(histfile,tli,nentries);
837873
pg_free(histfile);
@@ -841,12 +877,10 @@ getTimelineHistory(ControlFileData *controlFile, int *nentries)
841877
{
842878
inti;
843879

844-
if (controlFile==&ControlFile_source)
880+
if (is_source)
845881
pg_log_debug("Source timeline history:");
846-
elseif (controlFile==&ControlFile_target)
847-
pg_log_debug("Target timeline history:");
848882
else
849-
Assert(false);
883+
pg_log_debug("Target timeline history:");
850884

851885
/*
852886
* Print the target timeline history.
@@ -866,28 +900,19 @@ getTimelineHistory(ControlFileData *controlFile, int *nentries)
866900
}
867901

868902
/*
869-
* Determine the TLI of the last common timeline in the timeline history of the
870-
* two clusters. targetHistory is filled with target timeline history and
871-
* targetNentries is number of items in targetHistory. *tliIndex is set to the
872-
* index of last common timeline in targetHistory array, and *recptr is set to
873-
* the position where the timeline history diverged (ie. the first WAL record
874-
* that's not the same in both clusters).
875-
*
876-
* Control files of both clusters must be read into ControlFile_target/source
877-
* before calling this routine.
903+
* Determine the TLI of the last common timeline in the timeline history of
904+
* two clusters. *tliIndex is set to the index of last common timeline in
905+
* the arrays, and *recptr is set to the position where the timeline history
906+
* diverged (ie. the first WAL record that's not the same in both clusters).
878907
*/
879908
staticvoid
880-
findCommonAncestorTimeline(XLogRecPtr*recptr,int*tliIndex)
909+
findCommonAncestorTimeline(TimeLineHistoryEntry*a_history,inta_nentries,
910+
TimeLineHistoryEntry*b_history,intb_nentries,
911+
XLogRecPtr*recptr,int*tliIndex)
881912
{
882-
TimeLineHistoryEntry*sourceHistory;
883-
intsourceNentries;
884913
inti,
885914
n;
886915

887-
/* Retrieve timelines for both source and target */
888-
sourceHistory=getTimelineHistory(&ControlFile_source,&sourceNentries);
889-
targetHistory=getTimelineHistory(&ControlFile_target,&targetNentries);
890-
891916
/*
892917
* Trace the history forward, until we hit the timeline diverge. It may
893918
* still be possible that the source and target nodes used the same
@@ -896,21 +921,19 @@ findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
896921
* recovery processes. Hence check the start position of the new timeline
897922
* as well and move down by one extra timeline entry if they do not match.
898923
*/
899-
n=Min(sourceNentries,targetNentries);
924+
n=Min(a_nentries,b_nentries);
900925
for (i=0;i<n;i++)
901926
{
902-
if (sourceHistory[i].tli!=targetHistory[i].tli||
903-
sourceHistory[i].begin!=targetHistory[i].begin)
927+
if (a_history[i].tli!=b_history[i].tli||
928+
a_history[i].begin!=b_history[i].begin)
904929
break;
905930
}
906931

907932
if (i>0)
908933
{
909934
i--;
910-
*recptr=MinXLogRecPtr(sourceHistory[i].end,targetHistory[i].end);
935+
*recptr=MinXLogRecPtr(a_history[i].end,b_history[i].end);
911936
*tliIndex=i;
912-
913-
pg_free(sourceHistory);
914937
return;
915938
}
916939
else

‎src/bin/pg_rewind/t/007_standby_source.pl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@
8383
# A (primary) <--- B (standby) C (primary)
8484

8585
$node_c->promote;
86-
$node_c->safe_psql('postgres',"checkpoint");
8786

8887

8988
# Insert a row in A. This causes A/B and C to have "diverged", so that it's

‎src/bin/pg_rewind/t/008_min_recovery_point.pl

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,6 @@
7676
#
7777
$node_1->stop('fast');
7878
$node_3->promote;
79-
# Force a checkpoint after the promotion. pg_rewind looks at the control
80-
# file to determine what timeline the server is on, and that isn't updated
81-
# immediately at promotion, but only at the next checkpoint. When running
82-
# pg_rewind in remote mode, it's possible that we complete the test steps
83-
# after promotion so quickly that when pg_rewind runs, the standby has not
84-
# performed a checkpoint after promotion yet.
85-
$node_3->safe_psql('postgres',"checkpoint");
8679

8780
# reconfigure node_1 as a standby following node_3
8881
my$node_3_connstr =$node_3->connstr;
@@ -108,8 +101,6 @@
108101
$node_3->wait_for_catchup('node_1');
109102

110103
$node_1->promote;
111-
# Force a checkpoint after promotion, like earlier.
112-
$node_1->safe_psql('postgres',"checkpoint");
113104

114105
#
115106
# We now have a split-brain with two primaries. Insert a row on both to

‎src/bin/pg_rewind/t/RewindTest.pm

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,6 @@ sub promote_standby
198198
# the primary out-of-sync with the standby.
199199
$node_standby->promote;
200200

201-
# Force a checkpoint after the promotion. pg_rewind looks at the control
202-
# file to determine what timeline the server is on, and that isn't updated
203-
# immediately at promotion, but only at the next checkpoint. When running
204-
# pg_rewind in remote mode, it's possible that we complete the test steps
205-
# after promotion so quickly that when pg_rewind runs, the standby has not
206-
# performed a checkpoint after promotion yet.
207-
standby_psql("checkpoint");
208-
209201
return;
210202
}
211203

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp