Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf88bd31

Browse files
committed
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside aspinlock" rule. These are hazardous not only because they riskholding the lock for an excessively long time, but because it'spossible for palloc to throw elog(ERROR), leaving a stuck spinlockbehind.copy_replication_slot() had two separate places that did pallocswhile holding a spinlock. We can make the code simpler and saferby copying the whole ReplicationSlot struct into a local variablewhile holding the spinlock, and then referencing that copy.(While that's arguably more cycles than we really need to spendholding the lock, the struct isn't all that big, and this way seemsfar more maintainable than copying fields piecemeal. Anyway thisis surely much cheaper than a palloc.) That bug goes back to v12.InvalidateObsoleteReplicationSlots() not only did a palloc whileholding a spinlock, but for extra sloppiness then leaked the memory--- probably for the lifetime of the checkpointer process, thoughI didn't try to verify that. Fortunately that silliness is newin HEAD.pg_get_replication_slots() had a cosmetic violation of the rule,in that it only assumed it's safe to call namecpy() while holdinga spinlock. Still, that's a hazard waiting to bite somebody, andthere were some other cosmetic coding-rule violations in the samefunction, so clean it up. I back-patched this as far as v10; thecode exists before that but it looks different, and this didn'tseem important enough to adapt the patch further back.Discussion:https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
1 parent4d685f6 commitf88bd31

File tree

3 files changed

+57
-61
lines changed

3 files changed

+57
-61
lines changed

‎src/backend/replication/slot.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,7 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
10991099
{
11001100
ReplicationSlot*s=&ReplicationSlotCtl->replication_slots[i];
11011101
XLogRecPtrrestart_lsn=InvalidXLogRecPtr;
1102-
char*slotname;
1102+
NameDataslotname;
11031103

11041104
if (!s->in_use)
11051105
continue;
@@ -1112,23 +1112,24 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
11121112
continue;
11131113
}
11141114

1115-
slotname=pstrdup(NameStr(s->data.name));
1115+
slotname=s->data.name;
11161116
restart_lsn=s->data.restart_lsn;
11171117

11181118
SpinLockRelease(&s->mutex);
11191119
LWLockRelease(ReplicationSlotControlLock);
11201120

11211121
for (;;)
11221122
{
1123-
intwspid=ReplicationSlotAcquire(slotname,SAB_Inquire);
1123+
intwspid=ReplicationSlotAcquire(NameStr(slotname),
1124+
SAB_Inquire);
11241125

11251126
/* no walsender? success! */
11261127
if (wspid==0)
11271128
break;
11281129

11291130
ereport(LOG,
11301131
(errmsg("terminating walsender %d because replication slot \"%s\" is too far behind",
1131-
wspid,slotname)));
1132+
wspid,NameStr(slotname))));
11321133
(void)kill(wspid,SIGTERM);
11331134

11341135
ConditionVariableTimedSleep(&s->active_cv,10,
@@ -1138,7 +1139,7 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
11381139

11391140
ereport(LOG,
11401141
(errmsg("invalidating slot \"%s\" because its restart_lsn %X/%X exceeds max_slot_wal_keep_size",
1141-
slotname,
1142+
NameStr(slotname),
11421143
(uint32) (restart_lsn >>32),
11431144
(uint32)restart_lsn)));
11441145

‎src/backend/replication/slotfuncs.c

Lines changed: 49 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -278,88 +278,71 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
278278
for (slotno=0;slotno<max_replication_slots;slotno++)
279279
{
280280
ReplicationSlot*slot=&ReplicationSlotCtl->replication_slots[slotno];
281+
ReplicationSlotslot_contents;
281282
Datumvalues[PG_GET_REPLICATION_SLOTS_COLS];
282283
boolnulls[PG_GET_REPLICATION_SLOTS_COLS];
283-
284-
ReplicationSlotPersistencypersistency;
285-
TransactionIdxmin;
286-
TransactionIdcatalog_xmin;
287-
XLogRecPtrrestart_lsn;
288-
XLogRecPtrconfirmed_flush_lsn;
289-
pid_tactive_pid;
290-
Oiddatabase;
291-
NameDataslot_name;
292-
NameDataplugin;
293284
WALAvailabilitywalstate;
294285
XLogSegNolast_removed_seg;
295286
inti;
296287

297288
if (!slot->in_use)
298289
continue;
299290

291+
/* Copy slot contents while holding spinlock, then examine at leisure */
300292
SpinLockAcquire(&slot->mutex);
301-
302-
xmin=slot->data.xmin;
303-
catalog_xmin=slot->data.catalog_xmin;
304-
database=slot->data.database;
305-
restart_lsn=slot->data.restart_lsn;
306-
confirmed_flush_lsn=slot->data.confirmed_flush;
307-
namecpy(&slot_name,&slot->data.name);
308-
namecpy(&plugin,&slot->data.plugin);
309-
active_pid=slot->active_pid;
310-
persistency=slot->data.persistency;
311-
293+
slot_contents=*slot;
312294
SpinLockRelease(&slot->mutex);
313295

296+
memset(values,0,sizeof(values));
314297
memset(nulls,0,sizeof(nulls));
315298

316299
i=0;
317-
values[i++]=NameGetDatum(&slot_name);
300+
values[i++]=NameGetDatum(&slot_contents.data.name);
318301

319-
if (database==InvalidOid)
302+
if (slot_contents.data.database==InvalidOid)
320303
nulls[i++]= true;
321304
else
322-
values[i++]=NameGetDatum(&plugin);
305+
values[i++]=NameGetDatum(&slot_contents.data.plugin);
323306

324-
if (database==InvalidOid)
307+
if (slot_contents.data.database==InvalidOid)
325308
values[i++]=CStringGetTextDatum("physical");
326309
else
327310
values[i++]=CStringGetTextDatum("logical");
328311

329-
if (database==InvalidOid)
312+
if (slot_contents.data.database==InvalidOid)
330313
nulls[i++]= true;
331314
else
332-
values[i++]=database;
315+
values[i++]=ObjectIdGetDatum(slot_contents.data.database);
333316

334-
values[i++]=BoolGetDatum(persistency==RS_TEMPORARY);
335-
values[i++]=BoolGetDatum(active_pid!=0);
317+
values[i++]=BoolGetDatum(slot_contents.data.persistency==RS_TEMPORARY);
318+
values[i++]=BoolGetDatum(slot_contents.active_pid!=0);
336319

337-
if (active_pid!=0)
338-
values[i++]=Int32GetDatum(active_pid);
320+
if (slot_contents.active_pid!=0)
321+
values[i++]=Int32GetDatum(slot_contents.active_pid);
339322
else
340323
nulls[i++]= true;
341324

342-
if (xmin!=InvalidTransactionId)
343-
values[i++]=TransactionIdGetDatum(xmin);
325+
if (slot_contents.data.xmin!=InvalidTransactionId)
326+
values[i++]=TransactionIdGetDatum(slot_contents.data.xmin);
344327
else
345328
nulls[i++]= true;
346329

347-
if (catalog_xmin!=InvalidTransactionId)
348-
values[i++]=TransactionIdGetDatum(catalog_xmin);
330+
if (slot_contents.data.catalog_xmin!=InvalidTransactionId)
331+
values[i++]=TransactionIdGetDatum(slot_contents.data.catalog_xmin);
349332
else
350333
nulls[i++]= true;
351334

352-
if (restart_lsn!=InvalidXLogRecPtr)
353-
values[i++]=LSNGetDatum(restart_lsn);
335+
if (slot_contents.data.restart_lsn!=InvalidXLogRecPtr)
336+
values[i++]=LSNGetDatum(slot_contents.data.restart_lsn);
354337
else
355338
nulls[i++]= true;
356339

357-
if (confirmed_flush_lsn!=InvalidXLogRecPtr)
358-
values[i++]=LSNGetDatum(confirmed_flush_lsn);
340+
if (slot_contents.data.confirmed_flush!=InvalidXLogRecPtr)
341+
values[i++]=LSNGetDatum(slot_contents.data.confirmed_flush);
359342
else
360343
nulls[i++]= true;
361344

362-
walstate=GetWALAvailability(restart_lsn);
345+
walstate=GetWALAvailability(slot_contents.data.restart_lsn);
363346

364347
switch (walstate)
365348
{
@@ -378,6 +361,9 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
378361
caseWALAVAIL_REMOVED:
379362
values[i++]=CStringGetTextDatum("lost");
380363
break;
364+
365+
default:
366+
elog(ERROR,"invalid walstate: %d", (int)walstate);
381367
}
382368

383369
if (max_slot_wal_keep_size_mb >=0&&
@@ -393,8 +379,11 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
393379
else
394380
nulls[i++]= true;
395381

382+
Assert(i==PG_GET_REPLICATION_SLOTS_COLS);
383+
396384
tuplestore_putvalues(tupstore,tupdesc,values,nulls);
397385
}
386+
398387
LWLockRelease(ReplicationSlotControlLock);
399388

400389
tuplestore_donestoring(tupstore);
@@ -653,6 +642,8 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
653642
Namesrc_name=PG_GETARG_NAME(0);
654643
Namedst_name=PG_GETARG_NAME(1);
655644
ReplicationSlot*src=NULL;
645+
ReplicationSlotfirst_slot_contents;
646+
ReplicationSlotsecond_slot_contents;
656647
XLogRecPtrsrc_restart_lsn;
657648
boolsrc_islogical;
658649
booltemporary;
@@ -692,13 +683,10 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
692683

693684
if (s->in_use&&strcmp(NameStr(s->data.name),NameStr(*src_name))==0)
694685
{
686+
/* Copy the slot contents while holding spinlock */
695687
SpinLockAcquire(&s->mutex);
696-
src_islogical=SlotIsLogical(s);
697-
src_restart_lsn=s->data.restart_lsn;
698-
temporary=s->data.persistency==RS_TEMPORARY;
699-
plugin=logical_slot ?pstrdup(NameStr(s->data.plugin)) :NULL;
688+
first_slot_contents=*s;
700689
SpinLockRelease(&s->mutex);
701-
702690
src=s;
703691
break;
704692
}
@@ -711,6 +699,11 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
711699
(errcode(ERRCODE_UNDEFINED_OBJECT),
712700
errmsg("replication slot \"%s\" does not exist",NameStr(*src_name))));
713701

702+
src_islogical=SlotIsLogical(&first_slot_contents);
703+
src_restart_lsn=first_slot_contents.data.restart_lsn;
704+
temporary= (first_slot_contents.data.persistency==RS_TEMPORARY);
705+
plugin=logical_slot ?NameStr(first_slot_contents.data.plugin) :NULL;
706+
714707
/* Check type of replication slot */
715708
if (src_islogical!=logical_slot)
716709
ereport(ERROR,
@@ -775,18 +768,20 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
775768

776769
/* Copy data of source slot again */
777770
SpinLockAcquire(&src->mutex);
778-
copy_effective_xmin=src->effective_xmin;
779-
copy_effective_catalog_xmin=src->effective_catalog_xmin;
771+
second_slot_contents=*src;
772+
SpinLockRelease(&src->mutex);
780773

781-
copy_xmin=src->data.xmin;
782-
copy_catalog_xmin=src->data.catalog_xmin;
783-
copy_restart_lsn=src->data.restart_lsn;
784-
copy_confirmed_flush=src->data.confirmed_flush;
774+
copy_effective_xmin=second_slot_contents.effective_xmin;
775+
copy_effective_catalog_xmin=second_slot_contents.effective_catalog_xmin;
776+
777+
copy_xmin=second_slot_contents.data.xmin;
778+
copy_catalog_xmin=second_slot_contents.data.catalog_xmin;
779+
copy_restart_lsn=second_slot_contents.data.restart_lsn;
780+
copy_confirmed_flush=second_slot_contents.data.confirmed_flush;
785781

786782
/* for existence check */
787-
copy_name=pstrdup(NameStr(src->data.name));
788-
copy_islogical=SlotIsLogical(src);
789-
SpinLockRelease(&src->mutex);
783+
copy_name=NameStr(second_slot_contents.data.name);
784+
copy_islogical=SlotIsLogical(&second_slot_contents);
790785

791786
/*
792787
* Check if the source slot still exists and is valid. We regard it as

‎src/include/replication/slot.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@ typedef struct ReplicationSlot
158158
XLogRecPtrcandidate_restart_lsn;
159159
}ReplicationSlot;
160160

161-
#defineSlotIsPhysical(slot) (slot->data.database == InvalidOid)
162-
#defineSlotIsLogical(slot) (slot->data.database != InvalidOid)
161+
#defineSlotIsPhysical(slot) ((slot)->data.database == InvalidOid)
162+
#defineSlotIsLogical(slot) ((slot)->data.database != InvalidOid)
163163

164164
/*
165165
* Shared memory control area for all of replication slots.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp