Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit76b7872

Browse files
author
Amit Kapila
committed
Add slotsync skip statistics.
This patch adds two new columns to the pg_stat_replication_slots view:slotsync_skip_count - the total number of times a slotsync operation wasskipped.slotsync_skip_at - the timestamp of the most recent skip.These additions provide better visibility into replication slotsynchronization behavior.A future patch will introduce the slotsync_skip_reason column inpg_replication_slots to capture the reason for skip.Author: Shlok Kyal <shlok.kyal.oss@gmail.com>Reviewed-by: shveta malik <shveta.malik@gmail.com>Reviewed-by: Hayato Kuroda <kuroda.hayato@fujitsu.com>Reviewed-by: Ashutosh Sharma <ashu.coek88@gmail.com>Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>Discussion:https://postgr.es/m/CAE9k0PkhfKrTEAsGz4DjOhEj1nQ+hbQVfvWUxNacD38ibW3a1g@mail.gmail.com
1 parentc581c9a commit76b7872

File tree

11 files changed

+212
-42
lines changed

11 files changed

+212
-42
lines changed

‎contrib/test_decoding/expected/stats.out‎

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,17 @@ SELECT slot_name, spill_txns = 0 AS spill_txns, spill_count = 0 AS spill_count,
7878

7979
-- verify accessing/resetting stats for non-existent slot does something reasonable
8080
SELECT * FROM pg_stat_get_replication_slot('do-not-exist');
81-
slot_name | spill_txns | spill_count | spill_bytes | stream_txns | stream_count | stream_bytes | mem_exceeded_count | total_txns | total_bytes | stats_reset
82-
--------------+------------+-------------+-------------+-------------+--------------+--------------+--------------------+------------+-------------+-------------
83-
do-not-exist | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
81+
slot_name | spill_txns | spill_count | spill_bytes | stream_txns | stream_count | stream_bytes | mem_exceeded_count | total_txns | total_bytes |slotsync_skip_count | slotsync_skip_at |stats_reset
82+
--------------+------------+-------------+-------------+-------------+--------------+--------------+--------------------+------------+-------------+---------------------+------------------+-------------
83+
do-not-exist | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
8484
(1 row)
8585

8686
SELECT pg_stat_reset_replication_slot('do-not-exist');
8787
ERROR: replication slot "do-not-exist" does not exist
8888
SELECT * FROM pg_stat_get_replication_slot('do-not-exist');
89-
slot_name | spill_txns | spill_count | spill_bytes | stream_txns | stream_count | stream_bytes | mem_exceeded_count | total_txns | total_bytes | stats_reset
90-
--------------+------------+-------------+-------------+-------------+--------------+--------------+--------------------+------------+-------------+-------------
91-
do-not-exist | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
89+
slot_name | spill_txns | spill_count | spill_bytes | stream_txns | stream_count | stream_bytes | mem_exceeded_count | total_txns | total_bytes |slotsync_skip_count | slotsync_skip_at |stats_reset
90+
--------------+------------+-------------+-------------+-------------+--------------+--------------+--------------------+------------+-------------+---------------------+------------------+-------------
91+
do-not-exist | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
9292
(1 row)
9393

9494
-- spilling the xact

‎doc/src/sgml/monitoring.sgml‎

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,6 +1659,30 @@ description | Waiting for a newly initialized WAL file to reach durable storage
16591659
</entry>
16601660
</row>
16611661

1662+
<row>
1663+
<entry role="catalog_table_entry"><para role="column_definition">
1664+
<structfield>slotsync_skip_count</structfield><type>bigint</type>
1665+
</para>
1666+
<para>
1667+
Number of times the slot synchronization is skipped. Slot
1668+
synchronization occur only on standby servers and thus this column has
1669+
no meaning on the primary server.
1670+
</para>
1671+
</entry>
1672+
</row>
1673+
1674+
<row>
1675+
<entry role="catalog_table_entry"><para role="column_definition">
1676+
<structfield>slotsync_skip_at</structfield><type>timestamp with time zone</type>
1677+
</para>
1678+
<para>
1679+
Time at which last slot synchronization was skipped. Slot
1680+
synchronization occur only on standby servers and thus this column has
1681+
no meaning on the primary server.
1682+
</para>
1683+
</entry>
1684+
</row>
1685+
16621686
<row>
16631687
<entry role="catalog_table_entry"><para role="column_definition">
16641688
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>

‎src/backend/catalog/system_views.sql‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,8 @@ CREATE VIEW pg_stat_replication_slots AS
10761076
s.mem_exceeded_count,
10771077
s.total_txns,
10781078
s.total_bytes,
1079+
s.slotsync_skip_count,
1080+
s.slotsync_skip_at,
10791081
s.stats_reset
10801082
FROM pg_replication_slotsas r,
10811083
LATERAL pg_stat_get_replication_slot(slot_name)as s

‎src/backend/replication/logical/slotsync.c‎

Lines changed: 66 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,9 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
187187
TransactionIdPrecedes(remote_slot->catalog_xmin,
188188
slot->data.catalog_xmin))
189189
{
190+
/* Update slot sync skip stats */
191+
pgstat_report_replslotsync(slot);
192+
190193
/*
191194
* This can happen in following situations:
192195
*
@@ -277,6 +280,13 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
277280
errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
278281
LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
279282
LSN_FORMAT_ARGS(slot->data.confirmed_flush)));
283+
284+
/*
285+
* If we can't reach a consistent snapshot, the slot won't be
286+
* persisted. See update_and_persist_local_synced_slot().
287+
*/
288+
if (found_consistent_snapshot&& !(*found_consistent_snapshot))
289+
pgstat_report_replslotsync(slot);
280290
}
281291

282292
updated_xmin_or_lsn= true;
@@ -563,6 +573,7 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
563573
boolfound_consistent_snapshot= false;
564574
boolremote_slot_precedes= false;
565575

576+
/* Slotsync skip stats are handled in function update_local_synced_slot() */
566577
(void)update_local_synced_slot(remote_slot,remote_dbid,
567578
&found_consistent_snapshot,
568579
&remote_slot_precedes);
@@ -624,31 +635,9 @@ static bool
624635
synchronize_one_slot(RemoteSlot*remote_slot,Oidremote_dbid)
625636
{
626637
ReplicationSlot*slot;
627-
XLogRecPtrlatestFlushPtr;
638+
XLogRecPtrlatestFlushPtr=GetStandbyFlushRecPtr(NULL);
628639
boolslot_updated= false;
629640

630-
/*
631-
* Make sure that concerned WAL is received and flushed before syncing
632-
* slot to target lsn received from the primary server.
633-
*/
634-
latestFlushPtr=GetStandbyFlushRecPtr(NULL);
635-
if (remote_slot->confirmed_lsn>latestFlushPtr)
636-
{
637-
/*
638-
* Can get here only if GUC 'synchronized_standby_slots' on the
639-
* primary server was not configured correctly.
640-
*/
641-
ereport(AmLogicalSlotSyncWorkerProcess() ?LOG :ERROR,
642-
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
643-
errmsg("skipping slot synchronization because the received slot sync"
644-
" LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
645-
LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
646-
remote_slot->name,
647-
LSN_FORMAT_ARGS(latestFlushPtr)));
648-
649-
return false;
650-
}
651-
652641
/* Search for the named slot */
653642
if ((slot=SearchNamedReplicationSlot(remote_slot->name, true)))
654643
{
@@ -707,10 +696,38 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
707696
/* Skip the sync of an invalidated slot */
708697
if (slot->data.invalidated!=RS_INVAL_NONE)
709698
{
699+
pgstat_report_replslotsync(slot);
700+
710701
ReplicationSlotRelease();
711702
returnslot_updated;
712703
}
713704

705+
/*
706+
* Make sure that concerned WAL is received and flushed before syncing
707+
* slot to target lsn received from the primary server.
708+
*
709+
* Report statistics only after the slot has been acquired, ensuring
710+
* it cannot be dropped during the reporting process.
711+
*/
712+
if (remote_slot->confirmed_lsn>latestFlushPtr)
713+
{
714+
pgstat_report_replslotsync(slot);
715+
716+
/*
717+
* Can get here only if GUC 'synchronized_standby_slots' on the
718+
* primary server was not configured correctly.
719+
*/
720+
ereport(AmLogicalSlotSyncWorkerProcess() ?LOG :ERROR,
721+
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
722+
errmsg("skipping slot synchronization because the received slot sync"
723+
" LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
724+
LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
725+
remote_slot->name,
726+
LSN_FORMAT_ARGS(latestFlushPtr)));
727+
728+
returnslot_updated;
729+
}
730+
714731
/* Slot not ready yet, let's attempt to make it sync-ready now. */
715732
if (slot->data.persistency==RS_TEMPORARY)
716733
{
@@ -784,6 +801,32 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
784801
ReplicationSlotsComputeRequiredXmin(true);
785802
LWLockRelease(ProcArrayLock);
786803

804+
/*
805+
* Make sure that concerned WAL is received and flushed before syncing
806+
* slot to target lsn received from the primary server.
807+
*
808+
* Report statistics only after the slot has been acquired, ensuring
809+
* it cannot be dropped during the reporting process.
810+
*/
811+
if (remote_slot->confirmed_lsn>latestFlushPtr)
812+
{
813+
pgstat_report_replslotsync(slot);
814+
815+
/*
816+
* Can get here only if GUC 'synchronized_standby_slots' on the
817+
* primary server was not configured correctly.
818+
*/
819+
ereport(AmLogicalSlotSyncWorkerProcess() ?LOG :ERROR,
820+
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
821+
errmsg("skipping slot synchronization because the received slot sync"
822+
" LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
823+
LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
824+
remote_slot->name,
825+
LSN_FORMAT_ARGS(latestFlushPtr)));
826+
827+
return false;
828+
}
829+
787830
update_and_persist_local_synced_slot(remote_slot,remote_dbid);
788831

789832
slot_updated= true;

‎src/backend/utils/activity/pgstat_replslot.c‎

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,36 @@ pgstat_report_replslot(ReplicationSlot *slot, const PgStat_StatReplSlotEntry *re
102102
pgstat_unlock_entry(entry_ref);
103103
}
104104

105+
/*
106+
* Report replication slot sync skip statistics.
107+
*
108+
* Similar to pgstat_report_replslot(), we can rely on the stats for the
109+
* slot to exist and to belong to this slot.
110+
*/
111+
void
112+
pgstat_report_replslotsync(ReplicationSlot*slot)
113+
{
114+
PgStat_EntryRef*entry_ref;
115+
PgStatShared_ReplSlot*shstatent;
116+
PgStat_StatReplSlotEntry*statent;
117+
118+
/* Slot sync stats are valid only for logical slots on standby. */
119+
Assert(SlotIsLogical(slot));
120+
Assert(RecoveryInProgress());
121+
122+
entry_ref=pgstat_get_entry_ref_locked(PGSTAT_KIND_REPLSLOT,InvalidOid,
123+
ReplicationSlotIndex(slot), false);
124+
Assert(entry_ref!=NULL);
125+
126+
shstatent= (PgStatShared_ReplSlot*)entry_ref->shared_stats;
127+
statent=&shstatent->stats;
128+
129+
statent->slotsync_skip_count+=1;
130+
statent->slotsync_skip_at=GetCurrentTimestamp();
131+
132+
pgstat_unlock_entry(entry_ref);
133+
}
134+
105135
/*
106136
* Report replication slot creation.
107137
*
@@ -133,7 +163,7 @@ pgstat_create_replslot(ReplicationSlot *slot)
133163
* Report replication slot has been acquired.
134164
*
135165
* This guarantees that a stats entry exists during later
136-
* pgstat_report_replslot() calls.
166+
* pgstat_report_replslot()or pgstat_report_replslotsync()calls.
137167
*
138168
* If we previously crashed, no stats data exists. But if we did not crash,
139169
* the stats do belong to this slot:

‎src/backend/utils/adt/pgstatfuncs.c‎

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,7 +2129,7 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
21292129
Datum
21302130
pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
21312131
{
2132-
#definePG_STAT_GET_REPLICATION_SLOT_COLS11
2132+
#definePG_STAT_GET_REPLICATION_SLOT_COLS13
21332133
text*slotname_text=PG_GETARG_TEXT_P(0);
21342134
NameDataslotname;
21352135
TupleDesctupdesc;
@@ -2160,7 +2160,11 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
21602160
INT8OID,-1,0);
21612161
TupleDescInitEntry(tupdesc, (AttrNumber)10,"total_bytes",
21622162
INT8OID,-1,0);
2163-
TupleDescInitEntry(tupdesc, (AttrNumber)11,"stats_reset",
2163+
TupleDescInitEntry(tupdesc, (AttrNumber)11,"slotsync_skip_count",
2164+
INT8OID,-1,0);
2165+
TupleDescInitEntry(tupdesc, (AttrNumber)12,"slotsync_skip_at",
2166+
TIMESTAMPTZOID,-1,0);
2167+
TupleDescInitEntry(tupdesc, (AttrNumber)13,"stats_reset",
21642168
TIMESTAMPTZOID,-1,0);
21652169
BlessTupleDesc(tupdesc);
21662170

@@ -2186,11 +2190,17 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
21862190
values[7]=Int64GetDatum(slotent->mem_exceeded_count);
21872191
values[8]=Int64GetDatum(slotent->total_txns);
21882192
values[9]=Int64GetDatum(slotent->total_bytes);
2193+
values[10]=Int64GetDatum(slotent->slotsync_skip_count);
2194+
2195+
if (slotent->slotsync_skip_at==0)
2196+
nulls[11]= true;
2197+
else
2198+
values[11]=TimestampTzGetDatum(slotent->slotsync_skip_at);
21892199

21902200
if (slotent->stat_reset_timestamp==0)
2191-
nulls[10]= true;
2201+
nulls[12]= true;
21922202
else
2193-
values[10]=TimestampTzGetDatum(slotent->stat_reset_timestamp);
2203+
values[12]=TimestampTzGetDatum(slotent->stat_reset_timestamp);
21942204

21952205
/* Returns the record as Datum */
21962206
PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc,values,nulls)));

‎src/include/catalog/catversion.h‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/*yyyymmddN */
60-
#defineCATALOG_VERSION_NO202511221
60+
#defineCATALOG_VERSION_NO202511251
6161

6262
#endif

‎src/include/catalog/pg_proc.dat‎

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5691,9 +5691,9 @@
56915691
{ oid => '6169', descr => 'statistics: information about replication slot',
56925692
proname => 'pg_stat_get_replication_slot', provolatile => 's',
56935693
proparallel => 'r', prorettype => 'record', proargtypes => 'text',
5694-
proallargtypes => '{text,text,int8,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
5695-
proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o}',
5696-
proargnames => '{slot_name,slot_name,spill_txns,spill_count,spill_bytes,stream_txns,stream_count,stream_bytes,mem_exceeded_count,total_txns,total_bytes,stats_reset}',
5694+
proallargtypes => '{text,text,int8,int8,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz,timestamptz}',
5695+
proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o}',
5696+
proargnames => '{slot_name,slot_name,spill_txns,spill_count,spill_bytes,stream_txns,stream_count,stream_bytes,mem_exceeded_count,total_txns,total_bytes,slotsync_skip_count,slotsync_skip_at,stats_reset}',
56975697
prosrc => 'pg_stat_get_replication_slot' },
56985698

56995699
{ oid => '6230', descr => 'statistics: check if a stats object exists',

‎src/include/pgstat.h‎

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ typedef struct PgStat_TableXactStatus
214214
* ------------------------------------------------------------
215215
*/
216216

217-
#definePGSTAT_FILE_FORMAT_ID0x01A5BCBA
217+
#definePGSTAT_FILE_FORMAT_ID0x01A5BCBB
218218

219219
typedefstructPgStat_ArchiverStats
220220
{
@@ -400,6 +400,8 @@ typedef struct PgStat_StatReplSlotEntry
400400
PgStat_Countermem_exceeded_count;
401401
PgStat_Countertotal_txns;
402402
PgStat_Countertotal_bytes;
403+
PgStat_Counterslotsync_skip_count;
404+
TimestampTzslotsync_skip_at;
403405
TimestampTzstat_reset_timestamp;
404406
}PgStat_StatReplSlotEntry;
405407

@@ -745,6 +747,7 @@ extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
745747
externvoidpgstat_reset_replslot(constchar*name);
746748
structReplicationSlot;
747749
externvoidpgstat_report_replslot(structReplicationSlot*slot,constPgStat_StatReplSlotEntry*repSlotStat);
750+
externvoidpgstat_report_replslotsync(structReplicationSlot*slot);
748751
externvoidpgstat_create_replslot(structReplicationSlot*slot);
749752
externvoidpgstat_acquire_replslot(structReplicationSlot*slot);
750753
externvoidpgstat_drop_replslot(structReplicationSlot*slot);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp