Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit866237a

Browse files
committed
Advance old-segment horizon properly after slot invalidation
When some slots are invalidated due to the max_slot_wal_keep_size limit,the old segment horizon should move forward to stay within the limit.However, in commitc655077 we forgot to call KeepLogSeg again torecompute the horizon after invalidating replication slots. In caseswhere other slots remained, the limits would be recomputed eventuallyfor other reasons, but if all slots were invalidated, the limits wouldnot move at all afterwards. Repair.Backpatch to 13 where the feature was introduced.Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com>Reported-by: Marcin Krupowicz <mk@071.ovh>Discussion:https://postgr.es/m/17103-004130e8f27782c9@postgresql.org
1 parent91a1651 commit866237a

File tree

4 files changed

+74
-12
lines changed

4 files changed

+74
-12
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9056,7 +9056,15 @@ CreateCheckPoint(int flags)
90569056
*/
90579057
XLByteToSeg(RedoRecPtr,_logSegNo,wal_segment_size);
90589058
KeepLogSeg(recptr,&_logSegNo);
9059-
InvalidateObsoleteReplicationSlots(_logSegNo);
9059+
if (InvalidateObsoleteReplicationSlots(_logSegNo))
9060+
{
9061+
/*
9062+
* Some slots have been invalidated; recalculate the old-segment
9063+
* horizon, starting again from RedoRecPtr.
9064+
*/
9065+
XLByteToSeg(RedoRecPtr,_logSegNo,wal_segment_size);
9066+
KeepLogSeg(recptr,&_logSegNo);
9067+
}
90609068
_logSegNo--;
90619069
RemoveOldXlogFiles(_logSegNo,RedoRecPtr,recptr);
90629070

@@ -9391,7 +9399,15 @@ CreateRestartPoint(int flags)
93919399
replayPtr=GetXLogReplayRecPtr(&replayTLI);
93929400
endptr= (receivePtr<replayPtr) ?replayPtr :receivePtr;
93939401
KeepLogSeg(endptr,&_logSegNo);
9394-
InvalidateObsoleteReplicationSlots(_logSegNo);
9402+
if (InvalidateObsoleteReplicationSlots(_logSegNo))
9403+
{
9404+
/*
9405+
* Some slots have been invalidated; recalculate the old-segment
9406+
* horizon, starting again from RedoRecPtr.
9407+
*/
9408+
XLByteToSeg(RedoRecPtr,_logSegNo,wal_segment_size);
9409+
KeepLogSeg(endptr,&_logSegNo);
9410+
}
93959411
_logSegNo--;
93969412

93979413
/*
@@ -9559,6 +9575,12 @@ GetWALAvailability(XLogRecPtr targetLSN)
95599575
* requirement of replication slots. For the latter criterion we do consider
95609576
* the effects of max_slot_wal_keep_size: reserve at most that much space back
95619577
* from recptr.
9578+
*
9579+
* Note about replication slots: if this function calculates a value
9580+
* that's further ahead than what slots need reserved, then affected
9581+
* slots need to be invalidated and this function invoked again.
9582+
* XXX it might be a good idea to rewrite this function so that
9583+
* invalidation is optionally done here, instead.
95629584
*/
95639585
staticvoid
95649586
KeepLogSeg(XLogRecPtrrecptr,XLogSegNo*logSegNo)

‎src/backend/replication/slot.c

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,11 +1130,14 @@ ReplicationSlotReserveWal(void)
11301130
* Returns whether ReplicationSlotControlLock was released in the interim (and
11311131
* in that case we're not holding the lock at return, otherwise we are).
11321132
*
1133+
* Sets *invalidated true if the slot was invalidated. (Untouched otherwise.)
1134+
*
11331135
* This is inherently racy, because we release the LWLock
11341136
* for syscalls, so caller must restart if we return true.
11351137
*/
11361138
staticbool
1137-
InvalidatePossiblyObsoleteSlot(ReplicationSlot*s,XLogRecPtroldestLSN)
1139+
InvalidatePossiblyObsoleteSlot(ReplicationSlot*s,XLogRecPtroldestLSN,
1140+
bool*invalidated)
11381141
{
11391142
intlast_signaled_pid=0;
11401143
boolreleased_lock= false;
@@ -1191,6 +1194,9 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN)
11911194
s->active_pid=MyProcPid;
11921195
s->data.invalidated_at=restart_lsn;
11931196
s->data.restart_lsn=InvalidXLogRecPtr;
1197+
1198+
/* Let caller know */
1199+
*invalidated= true;
11941200
}
11951201

11961202
SpinLockRelease(&s->mutex);
@@ -1279,12 +1285,15 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN)
12791285
* Mark any slot that points to an LSN older than the given segment
12801286
* as invalid; it requires WAL that's about to be removed.
12811287
*
1288+
* Returns true when any slot have got invalidated.
1289+
*
12821290
* NB - this runs as part of checkpoint, so avoid raising errors if possible.
12831291
*/
1284-
void
1292+
bool
12851293
InvalidateObsoleteReplicationSlots(XLogSegNooldestSegno)
12861294
{
12871295
XLogRecPtroldestLSN;
1296+
boolinvalidated= false;
12881297

12891298
XLogSegNoOffsetToRecPtr(oldestSegno,0,wal_segment_size,oldestLSN);
12901299

@@ -1297,13 +1306,24 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
12971306
if (!s->in_use)
12981307
continue;
12991308

1300-
if (InvalidatePossiblyObsoleteSlot(s,oldestLSN))
1309+
if (InvalidatePossiblyObsoleteSlot(s,oldestLSN,&invalidated))
13011310
{
13021311
/* if the lock was released, start from scratch */
13031312
gotorestart;
13041313
}
13051314
}
13061315
LWLockRelease(ReplicationSlotControlLock);
1316+
1317+
/*
1318+
* If any slots have been invalidated, recalculate the resource limits.
1319+
*/
1320+
if (invalidated)
1321+
{
1322+
ReplicationSlotsComputeRequiredXmin(false);
1323+
ReplicationSlotsComputeRequiredLSN();
1324+
}
1325+
1326+
returninvalidated;
13071327
}
13081328

13091329
/*

‎src/include/replication/slot.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ extern void ReplicationSlotsComputeRequiredLSN(void);
209209
externXLogRecPtrReplicationSlotsComputeLogicalRestartLSN(void);
210210
externboolReplicationSlotsCountDBSlots(Oiddboid,int*nslots,int*nactive);
211211
externvoidReplicationSlotsDropDBSlots(Oiddboid);
212-
externvoidInvalidateObsoleteReplicationSlots(XLogSegNooldestSegno);
212+
externboolInvalidateObsoleteReplicationSlots(XLogSegNooldestSegno);
213213

214214
externvoidStartupReplicationSlots(void);
215215
externvoidCheckPointReplicationSlots(void);

‎src/test/recovery/t/019_replslot_limit.pl

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use PostgresNode;
99

1010
use File::Pathqw(rmtree);
11-
use Test::Moretests=>14;
11+
use Test::Moretests=>15;
1212
use Time::HiResqw(usleep);
1313

1414
$ENV{PGDATABASE} ='postgres';
@@ -173,7 +173,12 @@
173173
# Advance WAL again, the slot loses the oldest segment.
174174
my$logstart = get_log_size($node_master);
175175
advance_wal($node_master, 7);
176-
$node_master->safe_psql('postgres',"CHECKPOINT;");
176+
177+
# This slot should be broken, wait for that to happen
178+
$node_master->poll_query_until(
179+
'postgres',
180+
qq[SELECT wal_status = 'lost' FROM pg_replication_slots
181+
WHERE slot_name = 'rep1']);
177182

178183
# WARNING should be issued
179184
ok( find_in_log(
@@ -182,13 +187,28 @@
182187
$logstart),
183188
'check that the warning is logged');
184189

185-
# This slot should be broken
186-
$result =$node_master->safe_psql('postgres',
187-
"SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size FROM pg_replication_slots WHERE slot_name = 'rep1'"
188-
);
190+
$result =$node_master->safe_psql(
191+
'postgres',
192+
qq[
193+
SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size
194+
FROM pg_replication_slots WHERE slot_name = 'rep1']);
189195
is($result,"rep1|f|t|lost|",
190196
'check that the slot became inactive and the state "lost" persists');
191197

198+
# The invalidated slot shouldn't keep the old-segment horizon back;
199+
# see bug #17103: https://postgr.es/m/17103-004130e8f27782c9@postgresql.org
200+
# Test for this by creating a new slot and comparing its restart LSN
201+
# to the oldest existing file.
202+
my$redoseg =$node_master->safe_psql('postgres',
203+
"SELECT pg_walfile_name(lsn) FROM pg_create_physical_replication_slot('s2', true)"
204+
);
205+
my$oldestseg =$node_master->safe_psql('postgres',
206+
"SELECT pg_ls_dir AS f FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ '^[0-9A-F]{24}\$' ORDER BY 1 LIMIT 1"
207+
);
208+
$node_master->safe_psql('postgres',
209+
qq[SELECT pg_drop_replication_slot('s2')]);
210+
is($oldestseg,$redoseg,"check that segments have been removed");
211+
192212
# The standby no longer can connect to the master
193213
$logstart = get_log_size($node_standby);
194214
$node_standby->start;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp