Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6b9e875

Browse files
committed
Track block level checksum failures in pg_stat_database
This adds a column that counts how many checksum failures have occurredon files belonging to a specific database. Both checksum failuresduring normal backend processing and those created when a base backupdetects a checksum failure are counted.Author: Magnus HaganderReviewed by: Julien Rouhaud
1 parent3c59263 commit6b9e875

File tree

10 files changed

+114
-7
lines changed

10 files changed

+114
-7
lines changed

‎doc/src/sgml/monitoring.sgml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2508,6 +2508,11 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
25082508
<entry><type>bigint</type></entry>
25092509
<entry>Number of deadlocks detected in this database</entry>
25102510
</row>
2511+
<row>
2512+
<entry><structfield>checksum_failures</structfield></entry>
2513+
<entry><type>bigint</type></entry>
2514+
<entry>Number of data page checksum failures detected in this database</entry>
2515+
</row>
25112516
<row>
25122517
<entry><structfield>blk_read_time</structfield></entry>
25132518
<entry><type>double precision</type></entry>

‎src/backend/catalog/system_views.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,7 @@ CREATE VIEW pg_stat_database AS
823823
pg_stat_get_db_temp_files(D.oid)AS temp_files,
824824
pg_stat_get_db_temp_bytes(D.oid)AS temp_bytes,
825825
pg_stat_get_db_deadlocks(D.oid)AS deadlocks,
826+
pg_stat_get_db_checksum_failures(D.oid)AS checksum_failures,
826827
pg_stat_get_db_blk_read_time(D.oid)AS blk_read_time,
827828
pg_stat_get_db_blk_write_time(D.oid)AS blk_write_time,
828829
pg_stat_get_db_stat_reset_time(D.oid)AS stats_reset

‎src/backend/postmaster/pgstat.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
334334
staticvoidpgstat_recv_funcpurge(PgStat_MsgFuncpurge*msg,intlen);
335335
staticvoidpgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict*msg,intlen);
336336
staticvoidpgstat_recv_deadlock(PgStat_MsgDeadlock*msg,intlen);
337+
staticvoidpgstat_recv_checksum_failure(PgStat_MsgChecksumFailure*msg,intlen);
337338
staticvoidpgstat_recv_tempfile(PgStat_MsgTempFile*msg,intlen);
338339

339340
/* ------------------------------------------------------------
@@ -1518,6 +1519,40 @@ pgstat_report_deadlock(void)
15181519
pgstat_send(&msg,sizeof(msg));
15191520
}
15201521

1522+
1523+
1524+
/* --------
1525+
* pgstat_report_checksum_failures_in_db(dboid, failure_count) -
1526+
*
1527+
*Tell the collector about one or more checksum failures.
1528+
* --------
1529+
*/
1530+
void
1531+
pgstat_report_checksum_failures_in_db(Oiddboid,intfailurecount)
1532+
{
1533+
PgStat_MsgChecksumFailuremsg;
1534+
1535+
if (pgStatSock==PGINVALID_SOCKET|| !pgstat_track_counts)
1536+
return;
1537+
1538+
pgstat_setheader(&msg.m_hdr,PGSTAT_MTYPE_CHECKSUMFAILURE);
1539+
msg.m_databaseid=dboid;
1540+
msg.m_failurecount=failurecount;
1541+
pgstat_send(&msg,sizeof(msg));
1542+
}
1543+
1544+
/* --------
1545+
* pgstat_report_checksum_failure() -
1546+
*
1547+
*Tell the collector about a checksum failure.
1548+
* --------
1549+
*/
1550+
void
1551+
pgstat_report_checksum_failure(void)
1552+
{
1553+
pgstat_report_checksum_failures_in_db(MyDatabaseId,1);
1554+
}
1555+
15211556
/* --------
15221557
* pgstat_report_tempfile() -
15231558
*
@@ -4455,6 +4490,10 @@ PgstatCollectorMain(int argc, char *argv[])
44554490
pgstat_recv_tempfile((PgStat_MsgTempFile*)&msg,len);
44564491
break;
44574492

4493+
casePGSTAT_MTYPE_CHECKSUMFAILURE:
4494+
pgstat_recv_checksum_failure((PgStat_MsgChecksumFailure*)&msg,len);
4495+
break;
4496+
44584497
default:
44594498
break;
44604499
}
@@ -4554,6 +4593,7 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
45544593
dbentry->n_temp_files=0;
45554594
dbentry->n_temp_bytes=0;
45564595
dbentry->n_deadlocks=0;
4596+
dbentry->n_checksum_failures=0;
45574597
dbentry->n_block_read_time=0;
45584598
dbentry->n_block_write_time=0;
45594599

@@ -6196,6 +6236,22 @@ pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len)
61966236
dbentry->n_deadlocks++;
61976237
}
61986238

6239+
/* ----------
6240+
* pgstat_recv_checksum_failure() -
6241+
*
6242+
*Process a CHECKSUMFAILURE message.
6243+
* ----------
6244+
*/
6245+
staticvoid
6246+
pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure*msg,intlen)
6247+
{
6248+
PgStat_StatDBEntry*dbentry;
6249+
6250+
dbentry=pgstat_get_db_entry(msg->m_databaseid, true);
6251+
6252+
dbentry->n_checksum_failures+=msg->m_failurecount;
6253+
}
6254+
61996255
/* ----------
62006256
* pgstat_recv_tempfile() -
62016257
*

‎src/backend/replication/basebackup.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ typedef struct
5858
staticint64sendDir(constchar*path,intbasepathlen,boolsizeonly,
5959
List*tablespaces,boolsendtblspclinks);
6060
staticboolsendFile(constchar*readfilename,constchar*tarfilename,
61-
structstat*statbuf,boolmissing_ok);
61+
structstat*statbuf,boolmissing_ok,Oiddboid);
6262
staticvoidsendFileWithContent(constchar*filename,constchar*content);
6363
staticint64_tarWriteHeader(constchar*filename,constchar*linktarget,
6464
structstat*statbuf,boolsizeonly);
@@ -342,7 +342,7 @@ perform_base_backup(basebackup_options *opt)
342342
(errcode_for_file_access(),
343343
errmsg("could not stat file \"%s\": %m",
344344
XLOG_CONTROL_FILE)));
345-
sendFile(XLOG_CONTROL_FILE,XLOG_CONTROL_FILE,&statbuf, false);
345+
sendFile(XLOG_CONTROL_FILE,XLOG_CONTROL_FILE,&statbuf, false,InvalidOid);
346346
}
347347
else
348348
sendTablespace(ti->path, false);
@@ -592,7 +592,7 @@ perform_base_backup(basebackup_options *opt)
592592
(errcode_for_file_access(),
593593
errmsg("could not stat file \"%s\": %m",pathbuf)));
594594

595-
sendFile(pathbuf,pathbuf,&statbuf, false);
595+
sendFile(pathbuf,pathbuf,&statbuf, false,InvalidOid);
596596

597597
/* unconditionally mark file as archived */
598598
StatusFilePath(pathbuf,fname,".done");
@@ -1302,7 +1302,7 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
13021302

13031303
if (!sizeonly)
13041304
sent=sendFile(pathbuf,pathbuf+basepathlen+1,&statbuf,
1305-
true);
1305+
true,isDbDir ?pg_atoi(lastDir+1,sizeof(Oid),0) :InvalidOid);
13061306

13071307
if (sent||sizeonly)
13081308
{
@@ -1358,12 +1358,15 @@ is_checksummed_file(const char *fullpath, const char *filename)
13581358
*
13591359
* If 'missing_ok' is true, will not throw an error if the file is not found.
13601360
*
1361+
* If dboid is anything other than InvalidOid then any checksum failures detected
1362+
* will get reported to the stats collector.
1363+
*
13611364
* Returns true if the file was successfully sent, false if 'missing_ok',
13621365
* and the file did not exist.
13631366
*/
13641367
staticbool
13651368
sendFile(constchar*readfilename,constchar*tarfilename,structstat*statbuf,
1366-
boolmissing_ok)
1369+
boolmissing_ok,Oiddboid)
13671370
{
13681371
FILE*fp;
13691372
BlockNumberblkno=0;
@@ -1580,6 +1583,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
15801583
ereport(WARNING,
15811584
(errmsg("file \"%s\" has a total of %d checksum verification "
15821585
"failures",readfilename,checksum_failures)));
1586+
1587+
if (dboid!=InvalidOid)
1588+
pgstat_report_checksum_failures_in_db(dboid,checksum_failures);
15831589
}
15841590
total_checksum_failures+=checksum_failures;
15851591

‎src/backend/storage/page/bufpage.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include"access/htup_details.h"
1818
#include"access/itup.h"
1919
#include"access/xlog.h"
20+
#include"pgstat.h"
2021
#include"storage/checksum.h"
2122
#include"utils/memdebug.h"
2223
#include"utils/memutils.h"
@@ -151,6 +152,8 @@ PageIsVerified(Page page, BlockNumber blkno)
151152
errmsg("page verification failed, calculated checksum %u but expected %u",
152153
checksum,p->pd_checksum)));
153154

155+
pgstat_report_checksum_failure();
156+
154157
if (header_sane&&ignore_checksum_failure)
155158
return true;
156159
}

‎src/backend/utils/adt/pgstatfuncs.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1497,6 +1497,21 @@ pg_stat_get_db_deadlocks(PG_FUNCTION_ARGS)
14971497
PG_RETURN_INT64(result);
14981498
}
14991499

1500+
Datum
1501+
pg_stat_get_db_checksum_failures(PG_FUNCTION_ARGS)
1502+
{
1503+
Oiddbid=PG_GETARG_OID(0);
1504+
int64result;
1505+
PgStat_StatDBEntry*dbentry;
1506+
1507+
if ((dbentry=pgstat_fetch_stat_dbentry(dbid))==NULL)
1508+
result=0;
1509+
else
1510+
result= (int64) (dbentry->n_checksum_failures);
1511+
1512+
PG_RETURN_INT64(result);
1513+
}
1514+
15001515
Datum
15011516
pg_stat_get_db_blk_read_time(PG_FUNCTION_ARGS)
15021517
{

‎src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/*yyyymmddN */
56-
#defineCATALOG_VERSION_NO201903063
56+
#defineCATALOG_VERSION_NO201903091
5757

5858
#endif

‎src/include/catalog/pg_proc.dat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5227,6 +5227,10 @@
52275227
proname => 'pg_stat_get_db_deadlocks', provolatile => 's', proparallel => 'r',
52285228
prorettype => 'int8', proargtypes => 'oid',
52295229
prosrc => 'pg_stat_get_db_deadlocks' },
5230+
{ oid => '3426', descr => 'statistics: checksum failures detected in database',
5231+
proname => 'pg_stat_get_db_checksum_failures', provolatile => 's', proparallel => 'r',
5232+
prorettype => 'int8', proargtypes => 'oid',
5233+
prosrc => 'pg_stat_get_db_checksum_failures' },
52305234
{ oid => '3074', descr => 'statistics: last reset for a database',
52315235
proname => 'pg_stat_get_db_stat_reset_time', provolatile => 's',
52325236
proparallel => 'r', prorettype => 'timestamptz', proargtypes => 'oid',

‎src/include/pgstat.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ typedef enum StatMsgType
6464
PGSTAT_MTYPE_FUNCPURGE,
6565
PGSTAT_MTYPE_RECOVERYCONFLICT,
6666
PGSTAT_MTYPE_TEMPFILE,
67-
PGSTAT_MTYPE_DEADLOCK
67+
PGSTAT_MTYPE_DEADLOCK,
68+
PGSTAT_MTYPE_CHECKSUMFAILURE
6869
}StatMsgType;
6970

7071
/* ----------
@@ -530,6 +531,18 @@ typedef struct PgStat_MsgDeadlock
530531
Oidm_databaseid;
531532
}PgStat_MsgDeadlock;
532533

534+
/* ----------
535+
* PgStat_MsgChecksumFailureSent by the backend to tell the collector
536+
*about checksum failures noticed.
537+
* ----------
538+
*/
539+
typedefstructPgStat_MsgChecksumFailure
540+
{
541+
PgStat_MsgHdrm_hdr;
542+
Oidm_databaseid;
543+
intm_failurecount;
544+
}PgStat_MsgChecksumFailure;
545+
533546

534547
/* ----------
535548
* PgStat_MsgUnion over all possible messages.
@@ -593,6 +606,7 @@ typedef struct PgStat_StatDBEntry
593606
PgStat_Countern_temp_files;
594607
PgStat_Countern_temp_bytes;
595608
PgStat_Countern_deadlocks;
609+
PgStat_Countern_checksum_failures;
596610
PgStat_Countern_block_read_time;/* times in microseconds */
597611
PgStat_Countern_block_write_time;
598612

@@ -1200,6 +1214,8 @@ extern void pgstat_report_analyze(Relation rel,
12001214

12011215
externvoidpgstat_report_recovery_conflict(intreason);
12021216
externvoidpgstat_report_deadlock(void);
1217+
externvoidpgstat_report_checksum_failures_in_db(Oiddboid,intfailurecount);
1218+
externvoidpgstat_report_checksum_failure(void);
12031219

12041220
externvoidpgstat_initialize(void);
12051221
externvoidpgstat_bestart(void);

‎src/test/regress/expected/rules.out

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,6 +1817,7 @@ pg_stat_database| SELECT d.oid AS datid,
18171817
pg_stat_get_db_temp_files(d.oid) AS temp_files,
18181818
pg_stat_get_db_temp_bytes(d.oid) AS temp_bytes,
18191819
pg_stat_get_db_deadlocks(d.oid) AS deadlocks,
1820+
pg_stat_get_db_checksum_failures(d.oid) AS checksum_failures,
18201821
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
18211822
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
18221823
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp