@@ -55,11 +55,10 @@ static void base_backup_cleanup(int code, Datum arg);
5555static void perform_base_backup (basebackup_options * opt ,DIR * tblspcdir );
5656static void parse_basebackup_options (List * options ,basebackup_options * opt );
5757static void SendXlogRecPtrResult (XLogRecPtr ptr );
58+ static int compareWalFileNames (const void * a ,const void * b );
5859
5960/*
6061 * Size of each block sent into the tar stream for larger files.
61- *
62- * XLogSegSize *MUST* be evenly dividable by this
6362 */
6463#define TAR_SEND_SIZE 32768
6564
@@ -221,68 +220,208 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
221220 * We've left the last tar file "open", so we can now append the
222221 * required WAL files to it.
223222 */
223+ char pathbuf [MAXPGPATH ];
224224uint32 logid ,
225225logseg ;
226+ uint32 startlogid ,
227+ startlogseg ;
226228uint32 endlogid ,
227229endlogseg ;
228230struct stat statbuf ;
231+ List * historyFileList = NIL ;
232+ List * walFileList = NIL ;
233+ char * * walFiles ;
234+ int nWalFiles ;
235+ char firstoff [MAXFNAMELEN ];
236+ char lastoff [MAXFNAMELEN ];
237+ DIR * dir ;
238+ struct dirent * de ;
239+ int i ;
240+ ListCell * lc ;
241+ TimeLineID tli ;
229242
230- MemSet (& statbuf ,0 ,sizeof (statbuf ));
231- statbuf .st_mode = S_IRUSR |S_IWUSR ;
232- #ifndef WIN32
233- statbuf .st_uid = geteuid ();
234- statbuf .st_gid = getegid ();
235- #endif
236- statbuf .st_size = XLogSegSize ;
237- statbuf .st_mtime = time (NULL );
238-
239- XLByteToSeg (startptr ,logid ,logseg );
243+ /*
244+ * I'd rather not worry about timelines here, so scan pg_xlog and
245+ * include all WAL files in the range between 'startptr' and 'endptr',
246+ * regardless of the timeline the file is stamped with. If there are
247+ * some spurious WAL files belonging to timelines that don't belong
248+ * in this server's history, they will be included too. Normally there
249+ * shouldn't be such files, but if there are, there's little harm in
250+ * including them.
251+ */
252+ XLByteToSeg (startptr ,startlogid ,startlogseg );
253+ XLogFileName (firstoff ,ThisTimeLineID ,startlogid ,startlogseg );
240254XLByteToPrevSeg (endptr ,endlogid ,endlogseg );
255+ XLogFileName (lastoff ,ThisTimeLineID ,endlogid ,endlogseg );
241256
242- while (true)
257+ dir = AllocateDir ("pg_xlog" );
258+ if (!dir )
259+ ereport (ERROR ,
260+ (errmsg ("could not open directory \"%s\": %m" ,"pg_xlog" )));
261+ while ((de = ReadDir (dir ,"pg_xlog" ))!= NULL )
243262{
244- /* Send another xlog segment */
245- char fn [MAXPGPATH ];
246- int i ;
263+ /* Does it look like a WAL segment, and is it in the range? */
264+ if (strlen (de -> d_name )== 24 &&
265+ strspn (de -> d_name ,"0123456789ABCDEF" )== 24 &&
266+ strcmp (de -> d_name + 8 ,firstoff + 8 ) >=0 &&
267+ strcmp (de -> d_name + 8 ,lastoff + 8 ) <=0 )
268+ {
269+ walFileList = lappend (walFileList ,pstrdup (de -> d_name ));
270+ }
271+ /* Does it look like a timeline history file? */
272+ else if (strlen (de -> d_name )== 8 + strlen (".history" )&&
273+ strspn (de -> d_name ,"0123456789ABCDEF" )== 8 &&
274+ strcmp (de -> d_name + 8 ,".history" )== 0 )
275+ {
276+ historyFileList = lappend (historyFileList ,pstrdup (de -> d_name ));
277+ }
278+ }
279+ FreeDir (dir );
247280
248- XLogFilePath (fn ,ThisTimeLineID ,logid ,logseg );
249- _tarWriteHeader (fn ,NULL ,& statbuf );
281+ /*
282+ * Before we go any further, check that none of the WAL segments we
283+ * need were removed.
284+ */
285+ CheckXLogRemoved (startlogid ,startlogseg ,ThisTimeLineID );
286+
287+ /*
288+ * Put the WAL filenames into an array, and sort. We send the files
289+ * in order from oldest to newest, to reduce the chance that a file
290+ * is recycled before we get a chance to send it over.
291+ */
292+ nWalFiles = list_length (walFileList );
293+ walFiles = palloc (nWalFiles * sizeof (char * ));
294+ i = 0 ;
295+ foreach (lc ,walFileList )
296+ {
297+ walFiles [i ++ ]= lfirst (lc );
298+ }
299+ qsort (walFiles ,nWalFiles ,sizeof (char * ),compareWalFileNames );
250300
251- /* Send the actual WAL file contents, block-by-block */
252- for (i = 0 ;i < XLogSegSize /TAR_SEND_SIZE ;i ++ )
301+ /*
302+ * Sanity check: the first and last segment should cover startptr and
303+ * endptr, with no gaps in between.
304+ */
305+ XLogFromFileName (walFiles [0 ],& tli ,& logid ,& logseg );
306+ if (logid != startlogid || logseg != startlogseg )
307+ {
308+ char startfname [MAXFNAMELEN ];
309+ XLogFileName (startfname ,ThisTimeLineID ,startlogid ,startlogseg );
310+ ereport (ERROR ,
311+ (errmsg ("could not find WAL file %s" ,startfname )));
312+ }
313+ for (i = 0 ;i < nWalFiles ;i ++ )
314+ {
315+ int currlogid = logid ,
316+ currlogseg = logseg ;
317+ int nextlogid = logid ,
318+ nextlogseg = logseg ;
319+ NextLogSeg (nextlogid ,nextlogseg );
320+
321+ XLogFromFileName (walFiles [i ],& tli ,& logid ,& logseg );
322+ if (!((nextlogid == logid && nextlogseg == logseg )||
323+ (currlogid == logid && currlogseg == logseg )))
253324{
254- char buf [TAR_SEND_SIZE ];
255- XLogRecPtr ptr ;
325+ char nextfname [MAXFNAMELEN ];
326+ XLogFileName (nextfname ,ThisTimeLineID ,nextlogid ,nextlogseg );
327+ ereport (ERROR ,
328+ (errmsg ("could not find WAL file %s" ,nextfname )));
329+ }
330+ }
331+ if (logid != endlogid || logseg != endlogseg )
332+ {
333+ char endfname [MAXFNAMELEN ];
334+ XLogFileName (endfname ,ThisTimeLineID ,endlogid ,endlogseg );
335+ ereport (ERROR ,
336+ (errmsg ("could not find WAL file %s" ,endfname )));
337+ }
338+
339+ /* Ok, we have everything we need. Send the WAL files. */
340+ for (i = 0 ;i < nWalFiles ;i ++ )
341+ {
342+ FILE * fp ;
343+ char buf [TAR_SEND_SIZE ];
344+ size_t cnt ;
345+ pgoff_t len = 0 ;
256346
257- ptr . xlogid = logid ;
258- ptr . xrecoff = logseg * XLogSegSize + TAR_SEND_SIZE * i ;
347+ snprintf ( pathbuf , MAXPGPATH , XLOGDIR "/%s" , walFiles [ i ]) ;
348+ XLogFromFileName ( walFiles [ i ], & tli , & logid , & logseg ) ;
259349
350+ fp = AllocateFile (pathbuf ,"rb" );
351+ if (fp == NULL )
352+ {
260353/*
261- * Some old compilers, e.g. gcc 2.95.3/x86, think that passing
262- * a struct in the same function as a longjump might clobber a
263- * variable. bjm 2011-02-04
264- * http://lists.apple.com/archives/xcode-users/2003/Dec//msg000
265- * 51.html
354+ * Most likely reason for this is that the file was already
355+ * removed by a checkpoint, so check for that to get a better
356+ * error message.
266357 */
267- XLogRead (buf ,ptr ,TAR_SEND_SIZE );
268- if (pq_putmessage ('d' ,buf ,TAR_SEND_SIZE ))
358+ CheckXLogRemoved (logid ,logseg ,tli );
359+
360+ ereport (ERROR ,
361+ (errcode_for_file_access (),
362+ errmsg ("could not open file \"%s\": %m" ,pathbuf )));
363+ }
364+
365+ if (fstat (fileno (fp ),& statbuf )!= 0 )
366+ ereport (ERROR ,
367+ (errcode_for_file_access (),
368+ errmsg ("could not stat file \"%s\": %m" ,
369+ pathbuf )));
370+ if (statbuf .st_size != XLogSegSize )
371+ {
372+ CheckXLogRemoved (logid ,logseg ,tli );
373+ ereport (ERROR ,
374+ (errcode_for_file_access (),
375+ errmsg ("unexpected WAL file size \"%s\"" ,walFiles [i ])));
376+ }
377+
378+ _tarWriteHeader (pathbuf ,NULL ,& statbuf );
379+
380+ while ((cnt = fread (buf ,1 ,Min (sizeof (buf ),XLogSegSize - len ),fp ))> 0 )
381+ {
382+ CheckXLogRemoved (logid ,logseg ,tli );
383+ /* Send the chunk as a CopyData message */
384+ if (pq_putmessage ('d' ,buf ,cnt ))
269385ereport (ERROR ,
270386(errmsg ("base backup could not send data, aborting backup" )));
387+
388+ len += cnt ;
389+ if (len == XLogSegSize )
390+ break ;
271391}
272392
273- /*
274- * Files are always fixed size, and always end on a 512 byte
275- * boundary, so padding is never necessary.
276- */
393+ if (len != XLogSegSize )
394+ {
395+ CheckXLogRemoved (logid ,logseg ,tli );
396+ ereport (ERROR ,
397+ (errcode_for_file_access (),
398+ errmsg ("unexpected WAL file size \"%s\"" ,walFiles [i ])));
399+ }
277400
401+ /* XLogSegSize is a multiple of 512, so no need for padding */
402+ FreeFile (fp );
403+ }
404+
405+ /*
406+ * Send timeline history files too. Only the latest timeline history
407+ * file is required for recovery, and even that only if there happens
408+ * to be a timeline switch in the first WAL segment that contains the
409+ * checkpoint record, or if we're taking a base backup from a standby
410+ * server and the target timeline changes while the backup is taken.
411+ * But they are small and highly useful for debugging purposes, so
412+ * better include them all, always.
413+ */
414+ foreach (lc ,historyFileList )
415+ {
416+ char * fname = lfirst (lc );
417+ snprintf (pathbuf ,MAXPGPATH ,XLOGDIR "/%s" ,fname );
278418
279- /* Advance to the next WAL file */
280- NextLogSeg (logid ,logseg );
419+ if (lstat (pathbuf ,& statbuf )!= 0 )
420+ ereport (ERROR ,
421+ (errcode_for_file_access (),
422+ errmsg ("could not stat file \"%s\": %m" ,pathbuf )));
281423
282- /* Have we reached our stop position yet? */
283- if (logid > endlogid ||
284- (logid == endlogid && logseg > endlogseg ))
285- break ;
424+ sendFile (pathbuf ,pathbuf ,& statbuf , false);
286425}
287426
288427/* Send CopyDone message for the last tar file */
@@ -291,6 +430,19 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
291430SendXlogRecPtrResult (endptr );
292431}
293432
433+ /*
434+ * qsort comparison function, to compare log/seg portion of WAL segment
435+ * filenames, ignoring the timeline portion.
436+ */
437+ static int
438+ compareWalFileNames (const void * a ,const void * b )
439+ {
440+ char * fna = * ((char * * )a );
441+ char * fnb = * ((char * * )b );
442+
443+ return strcmp (fna + 8 ,fnb + 8 );
444+ }
445+
294446/*
295447 * Parse the base backup options passed down by the parser
296448 */