@@ -401,10 +401,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
401401
402402if (current .backup_mode != BACKUP_MODE_FULL )
403403{
404- elog (LOG ,"current_tli: %X" ,current .tli );
405- elog (LOG ,"prev_backup-> start_lsn: %X/%X" ,
404+ elog (LOG ,"Current tli: %X" ,current .tli );
405+ elog (LOG ,"Parent start_lsn: %X/%X" ,
406406 (uint32 ) (prev_backup -> start_lsn >>32 ), (uint32 ) (prev_backup -> start_lsn ));
407- elog (LOG ,"current. start_lsn: %X/%X" ,
407+ elog (LOG ,"start_lsn: %X/%X" ,
408408 (uint32 ) (current .start_lsn >>32 ), (uint32 ) (current .start_lsn ));
409409}
410410
@@ -583,9 +583,6 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
583583/* Notify end of backup */
584584pg_stop_backup (& current ,pg_startbackup_conn ,nodeInfo );
585585
586- elog (LOG ,"current.stop_lsn: %X/%X" ,
587- (uint32 ) (stop_backup_lsn >>32 ), (uint32 ) (stop_backup_lsn ));
588-
589586/* In case of backup from replica >= 9.6 we must fix minRecPoint,
590587 * First we must find pg_control in backup_files_list.
591588 */
@@ -1742,65 +1739,66 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
17421739/* Calculate LSN */
17431740stop_backup_lsn_tmp = ((uint64 )lsn_hi ) <<32 |lsn_lo ;
17441741
1742+ /* It is ok for replica to return invalid STOP LSN
1743+ * UPD: Apparently it is ok even for a master.
1744+ */
17451745if (!XRecOffIsValid (stop_backup_lsn_tmp ))
17461746{
1747- /* It is ok for replica to return STOP LSN with NullXRecOff
1748- * UPD: Apparently it is ok even for master.
1749- */
1750- if (XRecOffIsNull (stop_backup_lsn_tmp ))
1751- {
1752- char * xlog_path ,
1753- stream_xlog_path [MAXPGPATH ];
1754- XLogSegNo segno = 0 ;
1755- XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
1747+ char * xlog_path ,
1748+ stream_xlog_path [MAXPGPATH ];
1749+ XLogSegNo segno = 0 ;
1750+ XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
17561751
1757- /*
1758- * Even though the value is invalid, it's expected postgres behaviour
1759- * and we're trying to fix it below.
1760- */
1761- elog (LOG ,"Null offset instop_backup_lsn value %X/%X, trying to fix" ,
1762- (uint32 ) (stop_backup_lsn_tmp >>32 ), (uint32 ) (stop_backup_lsn_tmp ));
1752+ /*
1753+ * Even though the value is invalid, it's expected postgres behaviour
1754+ * and we're trying to fix it below.
1755+ */
1756+ elog (LOG ,"Invalid offset instop_lsn value %X/%X, trying to fix" ,
1757+ (uint32 ) (stop_backup_lsn_tmp >>32 ), (uint32 ) (stop_backup_lsn_tmp ));
17631758
1764- /*
1765- * Note: even with gdb it is very hard to produce automated tests for
1766- * contrecord +NullXRecOff , so emulate it for manual testing.
1767- */
1768- //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1769- //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1770- // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
1759+ /*
1760+ * Note: even with gdb it is very hard to produce automated tests for
1761+ * contrecord +invalid LSN , so emulate it for manual testing.
1762+ */
1763+ //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1764+ //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1765+ // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
17711766
1772- if (stream_wal )
1773- {
1774- pgBackupGetPath2 (backup ,stream_xlog_path ,
1775- lengthof (stream_xlog_path ),
1776- DATABASE_DIR ,PG_XLOG_DIR );
1777- xlog_path = stream_xlog_path ;
1778- }
1779- else
1780- xlog_path = arclog_path ;
1767+ if (stream_wal )
1768+ {
1769+ pgBackupGetPath2 (backup ,stream_xlog_path ,
1770+ lengthof (stream_xlog_path ),
1771+ DATABASE_DIR ,PG_XLOG_DIR );
1772+ xlog_path = stream_xlog_path ;
1773+ }
1774+ else
1775+ xlog_path = arclog_path ;
17811776
1782- GetXLogSegNo (stop_backup_lsn_tmp ,segno ,instance_config .xlog_seg_size );
1777+ GetXLogSegNo (stop_backup_lsn_tmp ,segno ,instance_config .xlog_seg_size );
17831778
1784- /*
1785- * Note, that there is no guarantee that corresponding WAL file even exists.
1786- * Replica may return LSN from future and keep staying in present.
1787- * Or it can returnLSN with NullXRecOff .
1788- *
1789- * That's bad, since we want to get real LSN to save it in backup label file
1790- * and to use it in WAL validation.
1791- *
1792- * So we try to do the following:
1793- * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1794- * look for the first valid record in it.
1795- * It solves the problem of occasional invalidXRecOff on write-busy system.
1796- * 2. Failing that, look for record in previous segment with endpoint
1797- * equal or greater than stop_lsn. It may(!) solve the problem ofNullXRecOff
1798- * on write-idle system. If that fails too, error out.
1799- */
1779+ /*
1780+ * Note, that there is no guarantee that corresponding WAL file even exists.
1781+ * Replica may return LSN from future and keep staying in present.
1782+ * Or it can returninvalid LSN .
1783+ *
1784+ * That's bad, since we want to get real LSN to save it in backup label file
1785+ * and to use it in WAL validation.
1786+ *
1787+ * So we try to do the following:
1788+ * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1789+ * look for the first valid record in it.
1790+ * It solves the problem of occasional invalidLSN on write-busy system.
1791+ * 2. Failing that, look for record in previous segment with endpoint
1792+ * equal or greater than stop_lsn. It may(!) solve the problem ofinvalid LSN
1793+ * on write-idle system. If that fails too, error out.
1794+ */
18001795
1796+ /* stop_lsn is pointing to a 0 byte of xlog segment */
1797+ if (stop_backup_lsn_tmp %instance_config .xlog_seg_size == 0 )
1798+ {
18011799/* Wait for segment with current stop_lsn, it is ok for it to never arrive */
18021800wait_wal_lsn (stop_backup_lsn_tmp , false,backup -> tli ,
1803- false, true,WARNING ,stream_wal );
1801+ false, true,WARNING ,stream_wal );
18041802
18051803/* Get the first record in segment with current stop_lsn */
18061804lsn_tmp = get_first_record_lsn (xlog_path ,segno ,backup -> tli ,
@@ -1836,17 +1834,39 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18361834(uint32 ) (stop_backup_lsn_tmp >>32 ),
18371835(uint32 ) (stop_backup_lsn_tmp ));
18381836}
1837+ }
1838+ /* stop lsn is aligned to xlog block size, just find next lsn */
1839+ else if (stop_backup_lsn_tmp %XLOG_BLCKSZ == 0 )
1840+ {
1841+ /* Wait for segment with current stop_lsn */
1842+ wait_wal_lsn (stop_backup_lsn_tmp , false,backup -> tli ,
1843+ false, true,ERROR ,stream_wal );
1844+
1845+ /* Get the next closest record in segment with current stop_lsn */
1846+ lsn_tmp = get_next_record_lsn (xlog_path ,segno ,backup -> tli ,
1847+ instance_config .xlog_seg_size ,
1848+ instance_config .archive_timeout ,
1849+ stop_backup_lsn_tmp );
18391850
1840- /* Setting stop_backup_lsn will set stop point for streaming */
1841- stop_backup_lsn = lsn_tmp ;
1842- stop_lsn_exists = true;
1851+ /* sanity */
1852+ if (!XRecOffIsValid (lsn_tmp )|| XLogRecPtrIsInvalid (lsn_tmp ))
1853+ elog (ERROR ,"Failed to get WAL record next to %X/%X" ,
1854+ (uint32 ) (stop_backup_lsn_tmp >>32 ),
1855+ (uint32 ) (stop_backup_lsn_tmp ));
18431856}
18441857/* PostgreSQL returned something very illegal as STOP_LSN, error out */
18451858else
18461859elog (ERROR ,"Invalid stop_backup_lsn value %X/%X" ,
18471860 (uint32 ) (stop_backup_lsn_tmp >>32 ), (uint32 ) (stop_backup_lsn_tmp ));
1861+
1862+ /* Setting stop_backup_lsn will set stop point for streaming */
1863+ stop_backup_lsn = lsn_tmp ;
1864+ stop_lsn_exists = true;
18481865}
18491866
1867+ elog (LOG ,"stop_lsn: %X/%X" ,
1868+ (uint32 ) (stop_backup_lsn >>32 ), (uint32 ) (stop_backup_lsn ));
1869+
18501870/* Write backup_label and tablespace_map */
18511871if (!exclusive_backup )
18521872{