Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3f8148c

Browse files
committed
Revert 019_replslot_limit.pl related debugging aids.
This reverts most of91c0570,f28bf66,fe0972e,afdeff1. Theonly thing left is the retry loop in 019_replslot_limit.pl that avoidsspurious failures by retrying a couple times.We haven't seen any hard evidence that this is caused by anything but slowprocess shutdown. We did not find any cases where walsenders did not vanishafter waiting for longer. Therefore there's no reason for this debugging codeto remain.Discussion:https://postgr.es/m/20220530190155.47wr3x2prdwyciah@alap3.anarazel.deBackpatch: 15-
1 parentb9eb0ff commit3f8148c

File tree

6 files changed

+5
-87
lines changed

6 files changed

+5
-87
lines changed

‎src/backend/replication/slot.c

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,6 @@ ReplicationSlotInitialize(void)
177177
staticvoid
178178
ReplicationSlotShmemExit(intcode,Datumarg)
179179
{
180-
/* temp debugging aid to analyze 019_replslot_limit failures */
181-
elog(DEBUG3,"replication slot exit hook, %s active slot",
182-
MyReplicationSlot!=NULL ?"with" :"without");
183-
184180
/* Make sure active replication slots are released */
185181
if (MyReplicationSlot!=NULL)
186182
ReplicationSlotRelease();
@@ -582,9 +578,6 @@ ReplicationSlotCleanup(void)
582578
Assert(MyReplicationSlot==NULL);
583579

584580
restart:
585-
/* temp debugging aid to analyze 019_replslot_limit failures */
586-
elog(DEBUG3,"temporary replication slot cleanup: begin");
587-
588581
LWLockAcquire(ReplicationSlotControlLock,LW_SHARED);
589582
for (i=0;i<max_replication_slots;i++)
590583
{
@@ -593,10 +586,6 @@ ReplicationSlotCleanup(void)
593586
if (!s->in_use)
594587
continue;
595588

596-
/* unlocked read of active_pid is ok for debugging purposes */
597-
elog(DEBUG3,"temporary replication slot cleanup: %d in use, active_pid: %d",
598-
i, (int)s->active_pid);
599-
600589
SpinLockAcquire(&s->mutex);
601590
if (s->active_pid==MyProcPid)
602591
{
@@ -614,8 +603,6 @@ ReplicationSlotCleanup(void)
614603
}
615604

616605
LWLockRelease(ReplicationSlotControlLock);
617-
618-
elog(DEBUG3,"temporary replication slot cleanup: done");
619606
}
620607

621608
/*
@@ -657,9 +644,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
657644
charpath[MAXPGPATH];
658645
chartmppath[MAXPGPATH];
659646

660-
/* temp debugging aid to analyze 019_replslot_limit failures */
661-
elog(DEBUG3,"replication slot drop: %s: begin",NameStr(slot->data.name));
662-
663647
/*
664648
* If some other backend ran this code concurrently with us, we might try
665649
* to delete a slot with a certain name while someone else was trying to
@@ -710,9 +694,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
710694
path,tmppath)));
711695
}
712696

713-
elog(DEBUG3,"replication slot drop: %s: removed on-disk",
714-
NameStr(slot->data.name));
715-
716697
/*
717698
* The slot is definitely gone. Lock out concurrent scans of the array
718699
* long enough to kill it. It's OK to clear the active PID here without
@@ -726,22 +707,15 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
726707
slot->active_pid=0;
727708
slot->in_use= false;
728709
LWLockRelease(ReplicationSlotControlLock);
729-
730-
elog(DEBUG3,"replication slot drop: %s: marked as not in use",NameStr(slot->data.name));
731-
732710
ConditionVariableBroadcast(&slot->active_cv);
733711

734-
elog(DEBUG3,"replication slot drop: %s: notified others",NameStr(slot->data.name));
735-
736712
/*
737713
* Slot is dead and doesn't prevent resource removal anymore, recompute
738714
* limits.
739715
*/
740716
ReplicationSlotsComputeRequiredXmin(false);
741717
ReplicationSlotsComputeRequiredLSN();
742718

743-
elog(DEBUG3,"replication slot drop: %s: computed required",NameStr(slot->data.name));
744-
745719
/*
746720
* If removing the directory fails, the worst thing that will happen is
747721
* that the user won't be able to create a new slot with the same name
@@ -751,8 +725,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
751725
ereport(WARNING,
752726
(errmsg("could not remove directory \"%s\"",tmppath)));
753727

754-
elog(DEBUG3,"replication slot drop: %s: removed directory",NameStr(slot->data.name));
755-
756728
/*
757729
* Drop the statistics entry for the replication slot. Do this while
758730
* holding ReplicationSlotAllocationLock so that we don't drop a
@@ -767,9 +739,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
767739
* a slot while we're still cleaning up the detritus of the old one.
768740
*/
769741
LWLockRelease(ReplicationSlotAllocationLock);
770-
771-
elog(DEBUG3,"replication slot drop: %s: done",
772-
NameStr(slot->data.name));
773742
}
774743

775744
/*
@@ -1329,12 +1298,6 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN,
13291298
(void)kill(active_pid,SIGTERM);
13301299
last_signaled_pid=active_pid;
13311300
}
1332-
else
1333-
{
1334-
/* temp debugging aid to analyze 019_replslot_limit failures */
1335-
elog(DEBUG3,"not signalling process %d during invalidation of slot \"%s\"",
1336-
active_pid,NameStr(slotname));
1337-
}
13381301

13391302
/* Wait until the slot is released. */
13401303
ConditionVariableSleep(&s->active_cv,
@@ -1398,10 +1361,6 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
13981361
XLogSegNoOffsetToRecPtr(oldestSegno,0,wal_segment_size,oldestLSN);
13991362

14001363
restart:
1401-
/* temp debugging aid to analyze 019_replslot_limit failures */
1402-
elog(DEBUG3,"begin invalidating obsolete replication slots older than %X/%X",
1403-
LSN_FORMAT_ARGS(oldestLSN));
1404-
14051364
LWLockAcquire(ReplicationSlotControlLock,LW_SHARED);
14061365
for (inti=0;i<max_replication_slots;i++)
14071366
{
@@ -1427,8 +1386,6 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
14271386
ReplicationSlotsComputeRequiredLSN();
14281387
}
14291388

1430-
elog(DEBUG3,"done invalidating obsolete replication slots");
1431-
14321389
returninvalidated;
14331390
}
14341391

‎src/backend/storage/lmgr/lwlock.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,10 +1942,3 @@ LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
19421942
}
19431943
return false;
19441944
}
1945-
1946-
/* temp debugging aid to analyze 019_replslot_limit failures */
1947-
int
1948-
LWLockHeldCount(void)
1949-
{
1950-
returnnum_held_lwlocks;
1951-
}

‎src/backend/utils/init/postinit.c

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,24 +1231,6 @@ ShutdownPostgres(int code, Datum arg)
12311231
* them explicitly.
12321232
*/
12331233
LockReleaseAll(USER_LOCKMETHOD, true);
1234-
1235-
/*
1236-
* temp debugging aid to analyze 019_replslot_limit failures
1237-
*
1238-
* If an error were thrown outside of a transaction nothing up to now
1239-
* would have released lwlocks. We probably will add an
1240-
* LWLockReleaseAll(). But for now make it easier to understand such cases
1241-
* by warning if any lwlocks are held.
1242-
*/
1243-
#ifdefUSE_ASSERT_CHECKING
1244-
{
1245-
intheld_lwlocks=LWLockHeldCount();
1246-
1247-
if (held_lwlocks)
1248-
elog(WARNING,"holding %d lwlocks at the end of ShutdownPostgres()",
1249-
held_lwlocks);
1250-
}
1251-
#endif
12521234
}
12531235

12541236

‎src/bin/pg_basebackup/pg_basebackup.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -694,16 +694,8 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier,
694694
bgchild=fork();
695695
if (bgchild==0)
696696
{
697-
intret;
698-
699697
/* in child process */
700-
ret=LogStreamerMain(param);
701-
702-
/* temp debugging aid to analyze 019_replslot_limit failures */
703-
if (verbose)
704-
pg_log_info("log streamer with pid %d exiting",getpid());
705-
706-
exit(ret);
698+
exit(LogStreamerMain(param));
707699
}
708700
elseif (bgchild<0)
709701
pg_fatal("could not create background process: %m");

‎src/include/storage/lwlock.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val);
121121
externvoidLWLockReleaseAll(void);
122122
externboolLWLockHeldByMe(LWLock*lock);
123123
externboolLWLockHeldByMeInMode(LWLock*lock,LWLockModemode);
124-
externintLWLockHeldCount(void);
125124

126125
externboolLWLockWaitForVar(LWLock*lock,uint64*valptr,uint64oldval,uint64*newval);
127126
externvoidLWLockUpdateVar(LWLock*lock,uint64*valptr,uint64value);

‎src/test/recovery/t/019_replslot_limit.pl

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -316,16 +316,13 @@
316316
max_wal_size = 2MB
317317
log_checkpoints = yes
318318
max_slot_wal_keep_size = 1MB
319-
320-
# temp debugging aid to analyze 019_replslot_limit failures
321-
log_min_messages=debug3
322319
));
323320
$node_primary3->start;
324321
$node_primary3->safe_psql('postgres',
325322
"SELECT pg_create_physical_replication_slot('rep3')");
326323
# Take backup
327324
$backup_name ='my_backup';
328-
$node_primary3->backup($backup_name,backup_options=> ['--verbose']);
325+
$node_primary3->backup($backup_name);
329326
# Create standby
330327
my$node_standby3 = PostgreSQL::Test::Cluster->new('standby_3');
331328
$node_standby3->init_from_backup($node_primary3,$backup_name,
@@ -336,11 +333,9 @@
336333

337334
my$senderpid;
338335

339-
# We've seen occasional cases where multiple walsender pids are active. It
340-
# could be that we're just observing process shutdown being slow. To collect
341-
# more information, retry a couple times, print a bit of debugging information
342-
# each iteration. Don't fail the test if retries find just one pid, the
343-
# buildfarm failures are too noisy.
336+
# We've seen occasional cases where multiple walsender pids are still active
337+
# at this point, apparently just due to process shutdown being slow. To avoid
338+
# spurious failures, retry a couple times.
344339
my$i = 0;
345340
while (1)
346341
{

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp