Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf64b11f

Browse files
committed
Fix an assertion failure related to an exclusive backup.
Previously multiple sessions could execute pg_start_backup() andpg_stop_backup() to start and stop an exclusive backup at the same time.This could trigger the assertion failure of"FailedAssertion("!(XLogCtl->Insert.exclusiveBackup)".This happend because, even while pg_start_backup() was startingan exclusive backup, other session could run pg_stop_backup()concurrently and mark the backup as not-in-progress unconditionally.This patch introduces ExclusiveBackupState indicating the state ofan exclusive backup. This state is used to ensure that there is onlyone session running pg_start_backup() or pg_stop_backup() atthe same time, to avoid the assertion failure.Back-patch to all supported versions.Author: Michael PaquierReviewed-By: Kyotaro Horiguchi and meReported-By: Andreas SeltenreichDiscussion: <87mvktojme.fsf@credativ.de>
1 parent47d32a4 commitf64b11f

File tree

1 file changed

+147
-62
lines changed
  • src/backend/access/transam

1 file changed

+147
-62
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 147 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,29 @@ typedef struct XLogwrtResult
348348
XLogRecPtrFlush;/* last byte + 1 flushed */
349349
}XLogwrtResult;
350350

351+
/*
352+
* State of an exclusive backup, necessary to control concurrent activities
353+
* across sessions when working on exclusive backups.
354+
*
355+
* EXCLUSIVE_BACKUP_NONE means that there is no exclusive backup actually
356+
* running, to be more precise pg_start_backup() is not being executed for
357+
* an exclusive backup and there is no exclusive backup in progress.
358+
* EXCLUSIVE_BACKUP_STARTING means that pg_start_backup() is starting an
359+
* exclusive backup.
360+
* EXCLUSIVE_BACKUP_IN_PROGRESS means that pg_start_backup() has finished
361+
* running and an exclusive backup is in progress. pg_stop_backup() is
362+
* needed to finish it.
363+
* EXCLUSIVE_BACKUP_STOPPING means that pg_stop_backup() is stopping an
364+
* exclusive backup.
365+
*/
366+
typedefenumExclusiveBackupState
367+
{
368+
EXCLUSIVE_BACKUP_NONE=0,
369+
EXCLUSIVE_BACKUP_STARTING,
370+
EXCLUSIVE_BACKUP_IN_PROGRESS,
371+
EXCLUSIVE_BACKUP_STOPPING
372+
}ExclusiveBackupState;
373+
351374
/*
352375
* Shared state data for XLogInsert.
353376
*/
@@ -370,13 +393,15 @@ typedef struct XLogCtlInsert
370393
boolfullPageWrites;
371394

372395
/*
373-
* exclusiveBackup is true if a backup started with pg_start_backup() is
374-
* in progress, and nonExclusiveBackups is a counter indicating the number
375-
* of streaming base backups currently in progress. forcePageWrites is set
376-
* to true when either of these is non-zero. lastBackupStart is the latest
377-
* checkpoint redo location used as a starting point for an online backup.
396+
* exclusiveBackupState indicates the state of an exclusive backup
397+
* (see comments of ExclusiveBackupState for more details).
398+
* nonExclusiveBackups is a counter indicating the number of streaming
399+
* base backups currently in progress. forcePageWrites is set to true
400+
* when either of these is non-zero. lastBackupStart is the latest
401+
* checkpoint redo location used as a starting point for an online
402+
* backup.
378403
*/
379-
boolexclusiveBackup;
404+
ExclusiveBackupStateexclusiveBackupState;
380405
intnonExclusiveBackups;
381406
XLogRecPtrlastBackupStart;
382407
}XLogCtlInsert;
@@ -693,6 +718,7 @@ static bool CheckForStandbyTrigger(void);
693718
staticvoidxlog_outrec(StringInfobuf,XLogRecord*record);
694719
#endif
695720
staticvoidpg_start_backup_callback(intcode,Datumarg);
721+
staticvoidpg_stop_backup_callback(intcode,Datumarg);
696722
staticboolread_backup_label(XLogRecPtr*checkPointLoc,
697723
bool*backupEndRequired,bool*backupFromStandby);
698724
staticvoidrm_redo_error_callback(void*arg);
@@ -8699,15 +8725,20 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
86998725
LWLockAcquire(WALInsertLock,LW_EXCLUSIVE);
87008726
if (exclusive)
87018727
{
8702-
if (XLogCtl->Insert.exclusiveBackup)
8728+
/*
8729+
* At first, mark that we're now starting an exclusive backup,
8730+
* to ensure that there are no other sessions currently running
8731+
* pg_start_backup() or pg_stop_backup().
8732+
*/
8733+
if (XLogCtl->Insert.exclusiveBackupState!=EXCLUSIVE_BACKUP_NONE)
87038734
{
87048735
LWLockRelease(WALInsertLock);
87058736
ereport(ERROR,
87068737
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
87078738
errmsg("a backup is already in progress"),
87088739
errhint("Run pg_stop_backup() and try again.")));
87098740
}
8710-
XLogCtl->Insert.exclusiveBackup=true;
8741+
XLogCtl->Insert.exclusiveBackupState=EXCLUSIVE_BACKUP_STARTING;
87118742
}
87128743
else
87138744
XLogCtl->Insert.nonExclusiveBackups++;
@@ -8867,7 +8898,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
88678898
{
88688899
/*
88698900
* Check for existing backup label --- implies a backup is already
8870-
* running. (XXX given that we checkedexclusiveBackup above,
8901+
* running. (XXX given that we checkedexclusiveBackupState above,
88718902
* maybe it would be OK to just unlink any such label file?)
88728903
*/
88738904
if (stat(BACKUP_LABEL_FILE,&stat_buf)!=0)
@@ -8908,6 +8939,16 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
89088939
}
89098940
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum)BoolGetDatum(exclusive));
89108941

8942+
/*
8943+
* Mark that start phase has correctly finished for an exclusive backup.
8944+
*/
8945+
if (exclusive)
8946+
{
8947+
LWLockAcquire(WALInsertLock,LW_EXCLUSIVE);
8948+
XLogCtl->Insert.exclusiveBackupState=EXCLUSIVE_BACKUP_IN_PROGRESS;
8949+
LWLockRelease(WALInsertLock);
8950+
}
8951+
89118952
/*
89128953
* We're done. As a convenience, return the starting WAL location.
89138954
*/
@@ -8926,23 +8967,41 @@ pg_start_backup_callback(int code, Datum arg)
89268967
LWLockAcquire(WALInsertLock,LW_EXCLUSIVE);
89278968
if (exclusive)
89288969
{
8929-
Assert(XLogCtl->Insert.exclusiveBackup);
8930-
XLogCtl->Insert.exclusiveBackup=false;
8970+
Assert(XLogCtl->Insert.exclusiveBackupState==EXCLUSIVE_BACKUP_STARTING);
8971+
XLogCtl->Insert.exclusiveBackupState=EXCLUSIVE_BACKUP_NONE;
89318972
}
89328973
else
89338974
{
89348975
Assert(XLogCtl->Insert.nonExclusiveBackups>0);
89358976
XLogCtl->Insert.nonExclusiveBackups--;
89368977
}
89378978

8938-
if (!XLogCtl->Insert.exclusiveBackup&&
8979+
if (XLogCtl->Insert.exclusiveBackupState==EXCLUSIVE_BACKUP_NONE&&
89398980
XLogCtl->Insert.nonExclusiveBackups==0)
89408981
{
89418982
XLogCtl->Insert.forcePageWrites= false;
89428983
}
89438984
LWLockRelease(WALInsertLock);
89448985
}
89458986

8987+
/*
8988+
* Error cleanup callback for pg_stop_backup
8989+
*/
8990+
staticvoid
8991+
pg_stop_backup_callback(intcode,Datumarg)
8992+
{
8993+
boolexclusive=DatumGetBool(arg);
8994+
8995+
/* Update backup status on failure */
8996+
LWLockAcquire(WALInsertLock,LW_EXCLUSIVE);
8997+
if (exclusive)
8998+
{
8999+
Assert(XLogCtl->Insert.exclusiveBackupState==EXCLUSIVE_BACKUP_STOPPING);
9000+
XLogCtl->Insert.exclusiveBackupState=EXCLUSIVE_BACKUP_IN_PROGRESS;
9001+
}
9002+
LWLockRelease(WALInsertLock);
9003+
}
9004+
89469005
/*
89479006
* do_pg_stop_backup is the workhorse of the user-visible pg_stop_backup()
89489007
* function.
@@ -9006,12 +9065,85 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
90069065
errmsg("WAL level not sufficient for making an online backup"),
90079066
errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start.")));
90089067

9068+
if (exclusive)
9069+
{
9070+
/*
9071+
* At first, mark that we're now stopping an exclusive backup,
9072+
* to ensure that there are no other sessions currently running
9073+
* pg_start_backup() or pg_stop_backup().
9074+
*/
9075+
LWLockAcquire(WALInsertLock,LW_EXCLUSIVE);
9076+
if (XLogCtl->Insert.exclusiveBackupState!=EXCLUSIVE_BACKUP_IN_PROGRESS)
9077+
{
9078+
LWLockRelease(WALInsertLock);
9079+
ereport(ERROR,
9080+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9081+
errmsg("exclusive backup not in progress")));
9082+
}
9083+
XLogCtl->Insert.exclusiveBackupState=EXCLUSIVE_BACKUP_STOPPING;
9084+
LWLockRelease(WALInsertLock);
9085+
9086+
/*
9087+
* Remove backup_label. In case of failure, the state for an exclusive
9088+
* backup is switched back to in-progress.
9089+
*/
9090+
PG_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum)BoolGetDatum(exclusive));
9091+
{
9092+
/*
9093+
* Read the existing label file into memory.
9094+
*/
9095+
structstatstatbuf;
9096+
intr;
9097+
9098+
if (stat(BACKUP_LABEL_FILE,&statbuf))
9099+
{
9100+
if (errno!=ENOENT)
9101+
ereport(ERROR,
9102+
(errcode_for_file_access(),
9103+
errmsg("could not stat file \"%s\": %m",
9104+
BACKUP_LABEL_FILE)));
9105+
ereport(ERROR,
9106+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9107+
errmsg("a backup is not in progress")));
9108+
}
9109+
9110+
lfp=AllocateFile(BACKUP_LABEL_FILE,"r");
9111+
if (!lfp)
9112+
{
9113+
ereport(ERROR,
9114+
(errcode_for_file_access(),
9115+
errmsg("could not read file \"%s\": %m",
9116+
BACKUP_LABEL_FILE)));
9117+
}
9118+
labelfile=palloc(statbuf.st_size+1);
9119+
r=fread(labelfile,statbuf.st_size,1,lfp);
9120+
labelfile[statbuf.st_size]='\0';
9121+
9122+
/*
9123+
* Close and remove the backup label file
9124+
*/
9125+
if (r!=1||ferror(lfp)||FreeFile(lfp))
9126+
ereport(ERROR,
9127+
(errcode_for_file_access(),
9128+
errmsg("could not read file \"%s\": %m",
9129+
BACKUP_LABEL_FILE)));
9130+
if (unlink(BACKUP_LABEL_FILE)!=0)
9131+
ereport(ERROR,
9132+
(errcode_for_file_access(),
9133+
errmsg("could not remove file \"%s\": %m",
9134+
BACKUP_LABEL_FILE)));
9135+
}
9136+
PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum)BoolGetDatum(exclusive));
9137+
}
9138+
90099139
/*
90109140
* OK to update backup counters and forcePageWrites
90119141
*/
90129142
LWLockAcquire(WALInsertLock,LW_EXCLUSIVE);
90139143
if (exclusive)
9014-
XLogCtl->Insert.exclusiveBackup= false;
9144+
{
9145+
XLogCtl->Insert.exclusiveBackupState=EXCLUSIVE_BACKUP_NONE;
9146+
}
90159147
else
90169148
{
90179149
/*
@@ -9024,60 +9156,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
90249156
XLogCtl->Insert.nonExclusiveBackups--;
90259157
}
90269158

9027-
if (!XLogCtl->Insert.exclusiveBackup&&
9159+
if (XLogCtl->Insert.exclusiveBackupState==EXCLUSIVE_BACKUP_NONE&&
90289160
XLogCtl->Insert.nonExclusiveBackups==0)
90299161
{
90309162
XLogCtl->Insert.forcePageWrites= false;
90319163
}
90329164
LWLockRelease(WALInsertLock);
90339165

9034-
if (exclusive)
9035-
{
9036-
/*
9037-
* Read the existing label file into memory.
9038-
*/
9039-
structstatstatbuf;
9040-
intr;
9041-
9042-
if (stat(BACKUP_LABEL_FILE,&statbuf))
9043-
{
9044-
if (errno!=ENOENT)
9045-
ereport(ERROR,
9046-
(errcode_for_file_access(),
9047-
errmsg("could not stat file \"%s\": %m",
9048-
BACKUP_LABEL_FILE)));
9049-
ereport(ERROR,
9050-
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9051-
errmsg("a backup is not in progress")));
9052-
}
9053-
9054-
lfp=AllocateFile(BACKUP_LABEL_FILE,"r");
9055-
if (!lfp)
9056-
{
9057-
ereport(ERROR,
9058-
(errcode_for_file_access(),
9059-
errmsg("could not read file \"%s\": %m",
9060-
BACKUP_LABEL_FILE)));
9061-
}
9062-
labelfile=palloc(statbuf.st_size+1);
9063-
r=fread(labelfile,statbuf.st_size,1,lfp);
9064-
labelfile[statbuf.st_size]='\0';
9065-
9066-
/*
9067-
* Close and remove the backup label file
9068-
*/
9069-
if (r!=1||ferror(lfp)||FreeFile(lfp))
9070-
ereport(ERROR,
9071-
(errcode_for_file_access(),
9072-
errmsg("could not read file \"%s\": %m",
9073-
BACKUP_LABEL_FILE)));
9074-
if (unlink(BACKUP_LABEL_FILE)!=0)
9075-
ereport(ERROR,
9076-
(errcode_for_file_access(),
9077-
errmsg("could not remove file \"%s\": %m",
9078-
BACKUP_LABEL_FILE)));
9079-
}
9080-
90819166
/*
90829167
* Read and parse the START WAL LOCATION line (this code is pretty crude,
90839168
* but we are not expecting any variability in the file format).
@@ -9318,7 +9403,7 @@ do_pg_abort_backup(void)
93189403
Assert(XLogCtl->Insert.nonExclusiveBackups>0);
93199404
XLogCtl->Insert.nonExclusiveBackups--;
93209405

9321-
if (!XLogCtl->Insert.exclusiveBackup&&
9406+
if (XLogCtl->Insert.exclusiveBackupState==EXCLUSIVE_BACKUP_NONE&&
93229407
XLogCtl->Insert.nonExclusiveBackups==0)
93239408
{
93249409
XLogCtl->Insert.forcePageWrites= false;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp