Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita7e8ece

Browse files
committed
Add -c/--restore-target-wal to pg_rewind
pg_rewind needs to copy from the source cluster to the target cluster aset of relation blocks changed from the previous checkpoint where WALforked up to the end of WAL on the target. Building this list ofrelation blocks requires a range of WAL segments that may not be presentanymore on the target's pg_wal, causing pg_rewind to fail. It ispossible to work around this issue by copying manually the WAL segmentsneeded but this may lead to some extra and actually useless work.This commit introduces a new option allowing pg_rewind to use arestore_command while doing the rewind by grabbing the parameter valueof restore_command from the target cluster configuration. This allowsthe rewind operation to be more reliable, so as only the WAL segmentsneeded by the rewind are restored from the archives.In order to be able to do that, a new routine is added to src/common/ toallow frontend tools to restore files from archives using analready-built restore command. This version is more simple than thebackend equivalent as there is no need to handle the non-recovery case.Author: Alexey KondratovReviewed-by: Andrey Borodin, Andres Freund, Alvaro Herrera, AlexanderKorotkov, Michael PaquierDiscussion:https://postgr.es/m/a3acff50-5a0d-9a2c-b3b2-ee36168955c1@postgrespro.ru
1 parent92d3108 commita7e8ece

File tree

12 files changed

+360
-27
lines changed

12 files changed

+360
-27
lines changed

‎doc/src/sgml/ref/pg_rewind.sgml

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,11 @@ PostgreSQL documentation
6666
can be found either on the target timeline, the source timeline, or their common
6767
ancestor. In the typical failover scenario where the target cluster was
6868
shut down soon after the divergence, this is not a problem, but if the
69-
target cluster ran for a long time after the divergence,the old WAL
70-
files might no longer be present. Inthat case,they canbemanually
71-
copiedfrom the WAL archive to the <filename>pg_wal</filename> directory, or
72-
fetched on startup by configuring <xref linkend="guc-primary-conninfo"/> or
73-
<xref linkend="guc-restore-command"/>. The use of
69+
target cluster ran for a long time after the divergence,its old WAL
70+
files might no longer be present. Inthis case,you can manually copy them
71+
from the WAL archive to the <filename>pg_wal</filename> directory, or run
72+
<application>pg_rewind</application> with the <literal>-c</literal> option to
73+
automatically retrieve them from the WAL archive. The use of
7474
<application>pg_rewind</application> is not limited to failover, e.g. a standby
7575
server can be promoted, run some write transactions, and then rewinded
7676
to become a standby again.
@@ -232,6 +232,19 @@ PostgreSQL documentation
232232
</listitem>
233233
</varlistentry>
234234

235+
<varlistentry>
236+
<term><option>-c</option></term>
237+
<term><option>--restore-target-wal</option></term>
238+
<listitem>
239+
<para>
240+
Use <varname>restore_command</varname> defined in the target cluster
241+
configuration to retrieve WAL files from the WAL archive if these
242+
files are no longer available in the <filename>pg_wal</filename>
243+
directory.
244+
</para>
245+
</listitem>
246+
</varlistentry>
247+
235248
<varlistentry>
236249
<term><option>--debug</option></term>
237250
<listitem>
@@ -318,7 +331,10 @@ GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, b
318331
history forked off from the target cluster. For each WAL record,
319332
record each data block that was touched. This yields a list of all
320333
the data blocks that were changed in the target cluster, after the
321-
source cluster forked off.
334+
source cluster forked off. If some of the WAL files are no longer
335+
available, try re-running <application>pg_rewind</application> with
336+
the <option>-c</option> option to search for the missing files in
337+
the WAL archive.
322338
</para>
323339
</step>
324340
<step>

‎src/bin/pg_rewind/parsexlog.c

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include"catalog/pg_control.h"
2020
#include"catalog/storage_xlog.h"
2121
#include"commands/dbcommands_xlog.h"
22+
#include"common/fe_archive.h"
2223
#include"filemap.h"
2324
#include"pg_rewind.h"
2425

@@ -41,6 +42,7 @@ static char xlogfpath[MAXPGPATH];
4142

4243
typedefstructXLogPageReadPrivate
4344
{
45+
constchar*restoreCommand;
4446
inttliIndex;
4547
}XLogPageReadPrivate;
4648

@@ -55,14 +57,15 @@ static intSimpleXLogPageRead(XLogReaderState *xlogreader,
5557
*/
5658
void
5759
extractPageMap(constchar*datadir,XLogRecPtrstartpoint,inttliIndex,
58-
XLogRecPtrendpoint)
60+
XLogRecPtrendpoint,constchar*restoreCommand)
5961
{
6062
XLogRecord*record;
6163
XLogReaderState*xlogreader;
6264
char*errormsg;
6365
XLogPageReadPrivateprivate;
6466

6567
private.tliIndex=tliIndex;
68+
private.restoreCommand=restoreCommand;
6669
xlogreader=XLogReaderAllocate(WalSegSz,datadir,&SimpleXLogPageRead,
6770
&private);
6871
if (xlogreader==NULL)
@@ -146,7 +149,7 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex)
146149
void
147150
findLastCheckpoint(constchar*datadir,XLogRecPtrforkptr,inttliIndex,
148151
XLogRecPtr*lastchkptrec,TimeLineID*lastchkpttli,
149-
XLogRecPtr*lastchkptredo)
152+
XLogRecPtr*lastchkptredo,constchar*restoreCommand)
150153
{
151154
/* Walk backwards, starting from the given record */
152155
XLogRecord*record;
@@ -170,6 +173,7 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
170173
}
171174

172175
private.tliIndex=tliIndex;
176+
private.restoreCommand=restoreCommand;
173177
xlogreader=XLogReaderAllocate(WalSegSz,datadir,&SimpleXLogPageRead,
174178
&private);
175179
if (xlogreader==NULL)
@@ -281,8 +285,29 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
281285

282286
if (xlogreadfd<0)
283287
{
284-
pg_log_error("could not open file \"%s\": %m",xlogfpath);
285-
return-1;
288+
/*
289+
* If we have no restore_command to execute, then exit.
290+
*/
291+
if (private->restoreCommand==NULL)
292+
{
293+
pg_log_error("could not open file \"%s\": %m",xlogfpath);
294+
return-1;
295+
}
296+
297+
/*
298+
* Since we have restore_command, then try to retrieve missing WAL
299+
* file from the archive.
300+
*/
301+
xlogreadfd=RestoreArchivedFile(xlogreader->segcxt.ws_dir,
302+
xlogfname,
303+
WalSegSz,
304+
private->restoreCommand);
305+
306+
if (xlogreadfd<0)
307+
return-1;
308+
else
309+
pg_log_debug("using file \"%s\" restored from archive",
310+
xlogfpath);
286311
}
287312
}
288313

‎src/bin/pg_rewind/pg_rewind.c

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include"common/file_perm.h"
2323
#include"common/file_utils.h"
2424
#include"common/restricted_token.h"
25+
#include"common/string.h"
2526
#include"fe_utils/recovery_gen.h"
2627
#include"fetch.h"
2728
#include"file_ops.h"
@@ -38,6 +39,7 @@ static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
3839
staticvoiddigestControlFile(ControlFileData*ControlFile,char*source,
3940
size_tsize);
4041
staticvoidsyncTargetDirectory(void);
42+
staticvoidgetRestoreCommand(constchar*argv0);
4143
staticvoidsanityChecks(void);
4244
staticvoidfindCommonAncestorTimeline(XLogRecPtr*recptr,int*tliIndex);
4345
staticvoidensureCleanShutdown(constchar*argv0);
@@ -53,11 +55,13 @@ intWalSegSz;
5355
char*datadir_target=NULL;
5456
char*datadir_source=NULL;
5557
char*connstr_source=NULL;
58+
char*restore_command=NULL;
5659

5760
staticbooldebug= false;
5861
boolshowprogress= false;
5962
booldry_run= false;
6063
booldo_sync= true;
64+
boolrestore_wal= false;
6165

6266
/* Target history */
6367
TimeLineHistoryEntry*targetHistory;
@@ -74,6 +78,8 @@ usage(const char *progname)
7478
printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"),progname);
7579
printf(_("Usage:\n %s [OPTION]...\n\n"),progname);
7680
printf(_("Options:\n"));
81+
printf(_(" -c, --restore-target-wal use restore_command in target config\n"
82+
" to retrieve WAL files from archives\n"));
7783
printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
7884
printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
7985
printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
@@ -103,6 +109,7 @@ main(int argc, char **argv)
103109
{"source-server",required_argument,NULL,2},
104110
{"no-ensure-shutdown",no_argument,NULL,4},
105111
{"version",no_argument,NULL,'V'},
112+
{"restore-target-wal",no_argument,NULL,'c'},
106113
{"dry-run",no_argument,NULL,'n'},
107114
{"no-sync",no_argument,NULL,'N'},
108115
{"progress",no_argument,NULL,'P'},
@@ -144,14 +151,18 @@ main(int argc, char **argv)
144151
}
145152
}
146153

147-
while ((c=getopt_long(argc,argv,"D:nNPR",long_options,&option_index))!=-1)
154+
while ((c=getopt_long(argc,argv,"cD:nNPR",long_options,&option_index))!=-1)
148155
{
149156
switch (c)
150157
{
151158
case'?':
152159
fprintf(stderr,_("Try \"%s --help\" for more information.\n"),progname);
153160
exit(1);
154161

162+
case'c':
163+
restore_wal= true;
164+
break;
165+
155166
case'P':
156167
showprogress= true;
157168
break;
@@ -255,6 +266,8 @@ main(int argc, char **argv)
255266

256267
umask(pg_mode_mask);
257268

269+
getRestoreCommand(argv[0]);
270+
258271
atexit(disconnect_atexit);
259272

260273
/* Connect to remote server */
@@ -350,9 +363,8 @@ main(int argc, char **argv)
350363
exit(0);
351364
}
352365

353-
findLastCheckpoint(datadir_target,divergerec,
354-
lastcommontliIndex,
355-
&chkptrec,&chkpttli,&chkptredo);
366+
findLastCheckpoint(datadir_target,divergerec,lastcommontliIndex,
367+
&chkptrec,&chkpttli,&chkptredo,restore_command);
356368
pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
357369
(uint32) (chkptrec >>32), (uint32)chkptrec,
358370
chkpttli);
@@ -378,7 +390,7 @@ main(int argc, char **argv)
378390
if (showprogress)
379391
pg_log_info("reading WAL in target");
380392
extractPageMap(datadir_target,chkptrec,lastcommontliIndex,
381-
ControlFile_target.checkPoint);
393+
ControlFile_target.checkPoint,restore_command);
382394
filemap_finalize();
383395

384396
if (showprogress)
@@ -804,6 +816,71 @@ syncTargetDirectory(void)
804816
fsync_pgdata(datadir_target,PG_VERSION_NUM);
805817
}
806818

819+
/*
820+
* Get value of GUC parameter restore_command from the target cluster.
821+
*
822+
* This uses a logic based on "postgres -C" to get the value from the
823+
* cluster.
824+
*/
825+
staticvoid
826+
getRestoreCommand(constchar*argv0)
827+
{
828+
intrc;
829+
charpostgres_exec_path[MAXPGPATH],
830+
postgres_cmd[MAXPGPATH],
831+
cmd_output[MAXPGPATH];
832+
833+
if (!restore_wal)
834+
return;
835+
836+
/* find postgres executable */
837+
rc=find_other_exec(argv0,"postgres",
838+
PG_BACKEND_VERSIONSTR,
839+
postgres_exec_path);
840+
841+
if (rc<0)
842+
{
843+
charfull_path[MAXPGPATH];
844+
845+
if (find_my_exec(argv0,full_path)<0)
846+
strlcpy(full_path,progname,sizeof(full_path));
847+
848+
if (rc==-1)
849+
pg_log_error("The program \"postgres\" is needed by %s but was not found in the\n"
850+
"same directory as \"%s\".\n"
851+
"Check your installation.",
852+
progname,full_path);
853+
else
854+
pg_log_error("The program \"postgres\" was found by \"%s\"\n"
855+
"but was not the same version as %s.\n"
856+
"Check your installation.",
857+
full_path,progname);
858+
exit(1);
859+
}
860+
861+
/*
862+
* Build a command able to retrieve the value of GUC parameter
863+
* restore_command, if set.
864+
*/
865+
snprintf(postgres_cmd,sizeof(postgres_cmd),
866+
"\"%s\" -D \"%s\" -C restore_command",
867+
postgres_exec_path,datadir_target);
868+
869+
if (!pipe_read_line(postgres_cmd,cmd_output,sizeof(cmd_output)))
870+
exit(1);
871+
872+
(void)pg_strip_crlf(cmd_output);
873+
874+
if (strcmp(cmd_output,"")==0)
875+
pg_fatal("restore_command is not set on the target cluster");
876+
877+
restore_command=pg_strdup(cmd_output);
878+
879+
pg_log_debug("using for rewind restore_command = \'%s\'",
880+
restore_command);
881+
}
882+
883+
807884
/*
808885
* Ensure clean shutdown of target instance by launching single-user mode
809886
* postgres to do crash recovery.

‎src/bin/pg_rewind/pg_rewind.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,13 @@ extern uint64 fetch_done;
4242

4343
/* in parsexlog.c */
4444
externvoidextractPageMap(constchar*datadir,XLogRecPtrstartpoint,
45-
inttliIndex,XLogRecPtrendpoint);
45+
inttliIndex,XLogRecPtrendpoint,
46+
constchar*restoreCommand);
4647
externvoidfindLastCheckpoint(constchar*datadir,XLogRecPtrsearchptr,
4748
inttliIndex,
4849
XLogRecPtr*lastchkptrec,TimeLineID*lastchkpttli,
49-
XLogRecPtr*lastchkptredo);
50+
XLogRecPtr*lastchkptredo,
51+
constchar*restoreCommand);
5052
externXLogRecPtrreadOneRecord(constchar*datadir,XLogRecPtrptr,
5153
inttliIndex);
5254

‎src/bin/pg_rewind/t/001_basic.pl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use strict;
22
use warnings;
33
use TestLib;
4-
use Test::Moretests=>15;
4+
use Test::Moretests=>20;
55

66
use FindBin;
77
use lib$FindBin::RealBin;
@@ -171,5 +171,6 @@ sub run_test
171171
# Run the test in both modes
172172
run_test('local');
173173
run_test('remote');
174+
run_test('archive');
174175

175176
exit(0);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp