@@ -192,7 +192,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
192192{
193193/* try to setup multi-timeline backup chain */
194194elog (WARNING ,"Valid backup on current timeline %u is not found, "
195- "try to look up on previous timelines" ,
195+ "trying to look up on previous timelines" ,
196196current .tli );
197197
198198tli_list = catalog_get_timelines (& instance_config );
@@ -333,7 +333,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
333333
334334/* list files with the logical path. omit $PGDATA */
335335dir_list_file (backup_files_list ,instance_config .pgdata ,
336- true, true, false,0 ,FIO_DB_HOST );
336+ true, true, false,true, 0 ,FIO_DB_HOST );
337337
338338/*
339339 * Get database_map (name to oid) for use in partial restore feature.
@@ -350,7 +350,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
350350/* External dirs numeration starts with 1.
351351 * 0 value is not external dir */
352352dir_list_file (backup_files_list ,parray_get (external_dirs ,i ),
353- false, true, false,i + 1 ,FIO_DB_HOST );
353+ false, true, false,true, i + 1 ,FIO_DB_HOST );
354354
355355/* close ssh session in main thread */
356356fio_disconnect ();
@@ -401,10 +401,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
401401
402402if (current .backup_mode != BACKUP_MODE_FULL )
403403{
404- elog (LOG ,"current_tli: %X" ,current .tli );
405- elog (LOG ,"prev_backup-> start_lsn: %X/%X" ,
404+ elog (LOG ,"Current tli: %X" ,current .tli );
405+ elog (LOG ,"Parent start_lsn: %X/%X" ,
406406 (uint32 ) (prev_backup -> start_lsn >>32 ), (uint32 ) (prev_backup -> start_lsn ));
407- elog (LOG ,"current. start_lsn: %X/%X" ,
407+ elog (LOG ,"start_lsn: %X/%X" ,
408408 (uint32 ) (current .start_lsn >>32 ), (uint32 ) (current .start_lsn ));
409409}
410410
@@ -436,10 +436,11 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
436436/*
437437 * Build the page map from ptrack information.
438438 */
439- if (nodeInfo -> ptrack_version_num = =20 )
439+ if (nodeInfo -> ptrack_version_num > =20 )
440440make_pagemap_from_ptrack_2 (backup_files_list ,backup_conn ,
441- nodeInfo -> ptrack_schema ,
442- prev_backup_start_lsn );
441+ nodeInfo -> ptrack_schema ,
442+ nodeInfo -> ptrack_version_num ,
443+ prev_backup_start_lsn );
443444else if (nodeInfo -> ptrack_version_num == 15 ||
444445nodeInfo -> ptrack_version_num == 16 ||
445446nodeInfo -> ptrack_version_num == 17 )
@@ -582,9 +583,6 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
582583/* Notify end of backup */
583584pg_stop_backup (& current ,pg_startbackup_conn ,nodeInfo );
584585
585- elog (LOG ,"current.stop_lsn: %X/%X" ,
586- (uint32 ) (stop_backup_lsn >>32 ), (uint32 ) (stop_backup_lsn ));
587-
588586/* In case of backup from replica >= 9.6 we must fix minRecPoint,
589587 * First we must find pg_control in backup_files_list.
590588 */
@@ -626,7 +624,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
626624/* Scan backup PG_XLOG_DIR */
627625xlog_files_list = parray_new ();
628626join_path_components (pg_xlog_path ,database_path ,PG_XLOG_DIR );
629- dir_list_file (xlog_files_list ,pg_xlog_path , false, true, false,0 ,
627+ dir_list_file (xlog_files_list ,pg_xlog_path , false, true, false,true, 0 ,
630628FIO_BACKUP_HOST );
631629
632630/* TODO: Drop streamed WAL segments greater than stop_lsn */
@@ -884,15 +882,10 @@ do_backup(time_t start_time, bool no_validate,
884882#endif
885883
886884get_ptrack_version (backup_conn ,& nodeInfo );
887- //elog(WARNING, "ptrack_version_num %d", ptrack_version_num);
885+ //elog(WARNING, "ptrack_version_num %d", ptrack_version_num);
888886
889887if (nodeInfo .ptrack_version_num > 0 )
890- {
891- if (nodeInfo .ptrack_version_num >=20 )
892- nodeInfo .is_ptrack_enable = pg_ptrack_enable2 (backup_conn );
893- else
894- nodeInfo .is_ptrack_enable = pg_ptrack_enable (backup_conn );
895- }
888+ nodeInfo .is_ptrack_enable = pg_ptrack_enable (backup_conn ,nodeInfo .ptrack_version_num );
896889
897890if (current .backup_mode == BACKUP_MODE_DIFF_PTRACK )
898891{
@@ -1746,65 +1739,66 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
17461739/* Calculate LSN */
17471740stop_backup_lsn_tmp = ((uint64 )lsn_hi ) <<32 |lsn_lo ;
17481741
1742+ /* It is ok for replica to return invalid STOP LSN
1743+ * UPD: Apparently it is ok even for a master.
1744+ */
17491745if (!XRecOffIsValid (stop_backup_lsn_tmp ))
17501746{
1751- /* It is ok for replica to return STOP LSN with NullXRecOff
1752- * UPD: Apparently it is ok even for master.
1753- */
1754- if (XRecOffIsNull (stop_backup_lsn_tmp ))
1755- {
1756- char * xlog_path ,
1757- stream_xlog_path [MAXPGPATH ];
1758- XLogSegNo segno = 0 ;
1759- XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
1747+ char * xlog_path ,
1748+ stream_xlog_path [MAXPGPATH ];
1749+ XLogSegNo segno = 0 ;
1750+ XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
17601751
1761- /*
1762- * Even though the value is invalid, it's expected postgres behaviour
1763- * and we're trying to fix it below.
1764- */
1765- elog (LOG ,"Null offset instop_backup_lsn value %X/%X, trying to fix" ,
1766- (uint32 ) (stop_backup_lsn_tmp >>32 ), (uint32 ) (stop_backup_lsn_tmp ));
1752+ /*
1753+ * Even though the value is invalid, it's expected postgres behaviour
1754+ * and we're trying to fix it below.
1755+ */
1756+ elog (LOG ,"Invalid offset instop_lsn value %X/%X, trying to fix" ,
1757+ (uint32 ) (stop_backup_lsn_tmp >>32 ), (uint32 ) (stop_backup_lsn_tmp ));
17671758
1768- /*
1769- * Note: even with gdb it is very hard to produce automated tests for
1770- * contrecord +NullXRecOff , so emulate it for manual testing.
1771- */
1772- //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1773- //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1774- // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
1759+ /*
1760+ * Note: even with gdb it is very hard to produce automated tests for
1761+ * contrecord +invalid LSN , so emulate it for manual testing.
1762+ */
1763+ //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1764+ //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1765+ // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
17751766
1776- if (stream_wal )
1777- {
1778- pgBackupGetPath2 (backup ,stream_xlog_path ,
1779- lengthof (stream_xlog_path ),
1780- DATABASE_DIR ,PG_XLOG_DIR );
1781- xlog_path = stream_xlog_path ;
1782- }
1783- else
1784- xlog_path = arclog_path ;
1767+ if (stream_wal )
1768+ {
1769+ pgBackupGetPath2 (backup ,stream_xlog_path ,
1770+ lengthof (stream_xlog_path ),
1771+ DATABASE_DIR ,PG_XLOG_DIR );
1772+ xlog_path = stream_xlog_path ;
1773+ }
1774+ else
1775+ xlog_path = arclog_path ;
17851776
1786- GetXLogSegNo (stop_backup_lsn_tmp ,segno ,instance_config .xlog_seg_size );
1777+ GetXLogSegNo (stop_backup_lsn_tmp ,segno ,instance_config .xlog_seg_size );
17871778
1788- /*
1789- * Note, that there is no guarantee that corresponding WAL file even exists.
1790- * Replica may return LSN from future and keep staying in present.
1791- * Or it can returnLSN with NullXRecOff .
1792- *
1793- * That's bad, since we want to get real LSN to save it in backup label file
1794- * and to use it in WAL validation.
1795- *
1796- * So we try to do the following:
1797- * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1798- * look for the first valid record in it.
1799- * It solves the problem of occasional invalidXRecOff on write-busy system.
1800- * 2. Failing that, look for record in previous segment with endpoint
1801- * equal or greater than stop_lsn. It may(!) solve the problem ofNullXRecOff
1802- * on write-idle system. If that fails too, error out.
1803- */
1779+ /*
1780+ * Note, that there is no guarantee that corresponding WAL file even exists.
1781+ * Replica may return LSN from future and keep staying in present.
1782+ * Or it can returninvalid LSN .
1783+ *
1784+ * That's bad, since we want to get real LSN to save it in backup label file
1785+ * and to use it in WAL validation.
1786+ *
1787+ * So we try to do the following:
1788+ * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1789+ * look for the first valid record in it.
1790+ * It solves the problem of occasional invalidLSN on write-busy system.
1791+ * 2. Failing that, look for record in previous segment with endpoint
1792+ * equal or greater than stop_lsn. It may(!) solve the problem ofinvalid LSN
1793+ * on write-idle system. If that fails too, error out.
1794+ */
18041795
1796+ /* stop_lsn is pointing to a 0 byte of xlog segment */
1797+ if (stop_backup_lsn_tmp %instance_config .xlog_seg_size == 0 )
1798+ {
18051799/* Wait for segment with current stop_lsn, it is ok for it to never arrive */
18061800wait_wal_lsn (stop_backup_lsn_tmp , false,backup -> tli ,
1807- false, true,WARNING ,stream_wal );
1801+ false, true,WARNING ,stream_wal );
18081802
18091803/* Get the first record in segment with current stop_lsn */
18101804lsn_tmp = get_first_record_lsn (xlog_path ,segno ,backup -> tli ,
@@ -1840,17 +1834,39 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18401834(uint32 ) (stop_backup_lsn_tmp >>32 ),
18411835(uint32 ) (stop_backup_lsn_tmp ));
18421836}
1837+ }
1838+ /* stop lsn is aligned to xlog block size, just find next lsn */
1839+ else if (stop_backup_lsn_tmp %XLOG_BLCKSZ == 0 )
1840+ {
1841+ /* Wait for segment with current stop_lsn */
1842+ wait_wal_lsn (stop_backup_lsn_tmp , false,backup -> tli ,
1843+ false, true,ERROR ,stream_wal );
1844+
1845+ /* Get the next closest record in segment with current stop_lsn */
1846+ lsn_tmp = get_next_record_lsn (xlog_path ,segno ,backup -> tli ,
1847+ instance_config .xlog_seg_size ,
1848+ instance_config .archive_timeout ,
1849+ stop_backup_lsn_tmp );
18431850
1844- /* Setting stop_backup_lsn will set stop point for streaming */
1845- stop_backup_lsn = lsn_tmp ;
1846- stop_lsn_exists = true;
1851+ /* sanity */
1852+ if (!XRecOffIsValid (lsn_tmp )|| XLogRecPtrIsInvalid (lsn_tmp ))
1853+ elog (ERROR ,"Failed to get WAL record next to %X/%X" ,
1854+ (uint32 ) (stop_backup_lsn_tmp >>32 ),
1855+ (uint32 ) (stop_backup_lsn_tmp ));
18471856}
18481857/* PostgreSQL returned something very illegal as STOP_LSN, error out */
18491858else
18501859elog (ERROR ,"Invalid stop_backup_lsn value %X/%X" ,
18511860 (uint32 ) (stop_backup_lsn_tmp >>32 ), (uint32 ) (stop_backup_lsn_tmp ));
1861+
1862+ /* Setting stop_backup_lsn will set stop point for streaming */
1863+ stop_backup_lsn = lsn_tmp ;
1864+ stop_lsn_exists = true;
18521865}
18531866
1867+ elog (LOG ,"stop_lsn: %X/%X" ,
1868+ (uint32 ) (stop_backup_lsn >>32 ), (uint32 ) (stop_backup_lsn ));
1869+
18541870/* Write backup_label and tablespace_map */
18551871if (!exclusive_backup )
18561872{