Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite5bcbb1

Browse files
committed
Advance old-segment horizon properly after slot invalidation
When some slots are invalidated due to the max_slot_wal_keep_size limit,the old segment horizon should move forward to stay within the limit.However, in commitc655077 we forgot to call KeepLogSeg again torecompute the horizon after invalidating replication slots. In caseswhere other slots remained, the limits would be recomputed eventuallyfor other reasons, but if all slots were invalidated, the limits wouldnot move at all afterwards. Repair.Backpatch to 13 where the feature was introduced.Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com>Reported-by: Marcin Krupowicz <mk@071.ovh>Discussion:https://postgr.es/m/17103-004130e8f27782c9@postgresql.org
1 parent525115a commite5bcbb1

File tree

4 files changed

+74
-12
lines changed

4 files changed

+74
-12
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9262,7 +9262,15 @@ CreateCheckPoint(int flags)
92629262
*/
92639263
XLByteToSeg(RedoRecPtr,_logSegNo,wal_segment_size);
92649264
KeepLogSeg(recptr,&_logSegNo);
9265-
InvalidateObsoleteReplicationSlots(_logSegNo);
9265+
if (InvalidateObsoleteReplicationSlots(_logSegNo))
9266+
{
9267+
/*
9268+
* Some slots have been invalidated; recalculate the old-segment
9269+
* horizon, starting again from RedoRecPtr.
9270+
*/
9271+
XLByteToSeg(RedoRecPtr,_logSegNo,wal_segment_size);
9272+
KeepLogSeg(recptr,&_logSegNo);
9273+
}
92669274
_logSegNo--;
92679275
RemoveOldXlogFiles(_logSegNo,RedoRecPtr,recptr);
92689276

@@ -9602,7 +9610,15 @@ CreateRestartPoint(int flags)
96029610
replayPtr=GetXLogReplayRecPtr(&replayTLI);
96039611
endptr= (receivePtr<replayPtr) ?replayPtr :receivePtr;
96049612
KeepLogSeg(endptr,&_logSegNo);
9605-
InvalidateObsoleteReplicationSlots(_logSegNo);
9613+
if (InvalidateObsoleteReplicationSlots(_logSegNo))
9614+
{
9615+
/*
9616+
* Some slots have been invalidated; recalculate the old-segment
9617+
* horizon, starting again from RedoRecPtr.
9618+
*/
9619+
XLByteToSeg(RedoRecPtr,_logSegNo,wal_segment_size);
9620+
KeepLogSeg(endptr,&_logSegNo);
9621+
}
96069622
_logSegNo--;
96079623

96089624
/*
@@ -9771,6 +9787,12 @@ GetWALAvailability(XLogRecPtr targetLSN)
97719787
* requirement of replication slots. For the latter criterion we do consider
97729788
* the effects of max_slot_wal_keep_size: reserve at most that much space back
97739789
* from recptr.
9790+
*
9791+
* Note about replication slots: if this function calculates a value
9792+
* that's further ahead than what slots need reserved, then affected
9793+
* slots need to be invalidated and this function invoked again.
9794+
* XXX it might be a good idea to rewrite this function so that
9795+
* invalidation is optionally done here, instead.
97749796
*/
97759797
staticvoid
97769798
KeepLogSeg(XLogRecPtrrecptr,XLogSegNo*logSegNo)

‎src/backend/replication/slot.c

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,11 +1142,14 @@ ReplicationSlotReserveWal(void)
11421142
* Returns whether ReplicationSlotControlLock was released in the interim (and
11431143
* in that case we're not holding the lock at return, otherwise we are).
11441144
*
1145+
* Sets *invalidated true if the slot was invalidated. (Untouched otherwise.)
1146+
*
11451147
* This is inherently racy, because we release the LWLock
11461148
* for syscalls, so caller must restart if we return true.
11471149
*/
11481150
staticbool
1149-
InvalidatePossiblyObsoleteSlot(ReplicationSlot*s,XLogRecPtroldestLSN)
1151+
InvalidatePossiblyObsoleteSlot(ReplicationSlot*s,XLogRecPtroldestLSN,
1152+
bool*invalidated)
11501153
{
11511154
intlast_signaled_pid=0;
11521155
boolreleased_lock= false;
@@ -1203,6 +1206,9 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN)
12031206
s->active_pid=MyProcPid;
12041207
s->data.invalidated_at=restart_lsn;
12051208
s->data.restart_lsn=InvalidXLogRecPtr;
1209+
1210+
/* Let caller know */
1211+
*invalidated= true;
12061212
}
12071213

12081214
SpinLockRelease(&s->mutex);
@@ -1290,12 +1296,15 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN)
12901296
* Mark any slot that points to an LSN older than the given segment
12911297
* as invalid; it requires WAL that's about to be removed.
12921298
*
1299+
* Returns true when any slot have got invalidated.
1300+
*
12931301
* NB - this runs as part of checkpoint, so avoid raising errors if possible.
12941302
*/
1295-
void
1303+
bool
12961304
InvalidateObsoleteReplicationSlots(XLogSegNooldestSegno)
12971305
{
12981306
XLogRecPtroldestLSN;
1307+
boolinvalidated= false;
12991308

13001309
XLogSegNoOffsetToRecPtr(oldestSegno,0,wal_segment_size,oldestLSN);
13011310

@@ -1308,13 +1317,24 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
13081317
if (!s->in_use)
13091318
continue;
13101319

1311-
if (InvalidatePossiblyObsoleteSlot(s,oldestLSN))
1320+
if (InvalidatePossiblyObsoleteSlot(s,oldestLSN,&invalidated))
13121321
{
13131322
/* if the lock was released, start from scratch */
13141323
gotorestart;
13151324
}
13161325
}
13171326
LWLockRelease(ReplicationSlotControlLock);
1327+
1328+
/*
1329+
* If any slots have been invalidated, recalculate the resource limits.
1330+
*/
1331+
if (invalidated)
1332+
{
1333+
ReplicationSlotsComputeRequiredXmin(false);
1334+
ReplicationSlotsComputeRequiredLSN();
1335+
}
1336+
1337+
returninvalidated;
13181338
}
13191339

13201340
/*

‎src/include/replication/slot.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ extern void ReplicationSlotsComputeRequiredLSN(void);
214214
externXLogRecPtrReplicationSlotsComputeLogicalRestartLSN(void);
215215
externboolReplicationSlotsCountDBSlots(Oiddboid,int*nslots,int*nactive);
216216
externvoidReplicationSlotsDropDBSlots(Oiddboid);
217-
externvoidInvalidateObsoleteReplicationSlots(XLogSegNooldestSegno);
217+
externboolInvalidateObsoleteReplicationSlots(XLogSegNooldestSegno);
218218
externReplicationSlot*SearchNamedReplicationSlot(constchar*name,boolneed_lock);
219219
externvoidReplicationSlotNameForTablesync(Oidsuboid,Oidrelid,char*syncslotname,intszslot);
220220
externvoidReplicationSlotDropAtPubNode(WalReceiverConn*wrconn,char*slotname,boolmissing_ok);

‎src/test/recovery/t/019_replslot_limit.pl

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
use PostgresNode;
1212

1313
use File::Pathqw(rmtree);
14-
use Test::Moretests=>$TestLib::windows_os ?14 :18;
14+
use Test::Moretests=>$TestLib::windows_os ?15 :19;
1515
use Time::HiResqw(usleep);
1616

1717
$ENV{PGDATABASE} ='postgres';
@@ -176,7 +176,12 @@
176176
# Advance WAL again, the slot loses the oldest segment.
177177
my$logstart = get_log_size($node_primary);
178178
advance_wal($node_primary, 7);
179-
$node_primary->safe_psql('postgres',"CHECKPOINT;");
179+
180+
# This slot should be broken, wait for that to happen
181+
$node_primary->poll_query_until(
182+
'postgres',
183+
qq[SELECT wal_status = 'lost' FROM pg_replication_slots
184+
WHERE slot_name = 'rep1']);
180185

181186
# WARNING should be issued
182187
ok( find_in_log(
@@ -185,13 +190,28 @@
185190
$logstart),
186191
'check that the warning is logged');
187192

188-
# This slot should be broken
189-
$result =$node_primary->safe_psql('postgres',
190-
"SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size FROM pg_replication_slots WHERE slot_name = 'rep1'"
191-
);
193+
$result =$node_primary->safe_psql(
194+
'postgres',
195+
qq[
196+
SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size
197+
FROM pg_replication_slots WHERE slot_name = 'rep1']);
192198
is($result,"rep1|f|t|lost|",
193199
'check that the slot became inactive and the state "lost" persists');
194200

201+
# The invalidated slot shouldn't keep the old-segment horizon back;
202+
# see bug #17103: https://postgr.es/m/17103-004130e8f27782c9@postgresql.org
203+
# Test for this by creating a new slot and comparing its restart LSN
204+
# to the oldest existing file.
205+
my$redoseg =$node_primary->safe_psql('postgres',
206+
"SELECT pg_walfile_name(lsn) FROM pg_create_physical_replication_slot('s2', true)"
207+
);
208+
my$oldestseg =$node_primary->safe_psql('postgres',
209+
"SELECT pg_ls_dir AS f FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ '^[0-9A-F]{24}\$' ORDER BY 1 LIMIT 1"
210+
);
211+
$node_primary->safe_psql('postgres',
212+
qq[SELECT pg_drop_replication_slot('s2')]);
213+
is($oldestseg,$redoseg,"check that segments have been removed");
214+
195215
# The standby no longer can connect to the primary
196216
$logstart = get_log_size($node_standby);
197217
$node_standby->start;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp