Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit42496cb

Browse files
committed
amcheck: Distinguish interrupted page deletion from corruption.
This prevents false-positive reports about "the first child of leftmosttarget page is not leftmost of its level", "block %u is not leftmost"and "left link/right link pair". They appeared if amcheck ran beforeVACUUM cleaned things, after a cluster exited recovery between thefirst-stage and second-stage WAL records of a deletion. Back-patch tov11 (all supported versions).Reviewed by Peter Geoghegan.Discussion:https://postgr.es/m/20231005025232.c7.nmisch@google.com
1 parent162b38a commit42496cb

File tree

2 files changed

+168
-4
lines changed

2 files changed

+168
-4
lines changed

‎contrib/amcheck/t/005_pitr.pl

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright (c) 2021-2023, PostgreSQL Global Development Group
2+
3+
# Test integrity of intermediate states by PITR to those states
4+
use strict;
5+
use warnings;
6+
use PostgreSQL::Test::Cluster;
7+
use PostgreSQL::Test::Utils;
8+
use Test::More;
9+
10+
# origin node: generate WAL records of interest.
11+
my$origin = PostgreSQL::Test::Cluster->new('origin');
12+
$origin->init(has_archiving=> 1,allows_streaming=> 1);
13+
$origin->append_conf('postgresql.conf','autovacuum = off');
14+
$origin->start;
15+
$origin->backup('my_backup');
16+
# Create a table with each of 6 PK values spanning 1/4 of a block. Delete the
17+
# first four, so one index leaf is eligible for deletion. Make a replication
18+
# slot just so pg_waldump will always have access to later WAL.
19+
my$setup =<<EOSQL;
20+
BEGIN;
21+
CREATE EXTENSION amcheck;
22+
CREATE TABLE not_leftmost (c text);
23+
ALTER TABLE not_leftmost ALTER c SET STORAGE PLAIN;
24+
INSERT INTO not_leftmost
25+
SELECT repeat(n::text, database_block_size / 4)
26+
FROM generate_series(1,6) t(n), pg_control_init();
27+
ALTER TABLE not_leftmost ADD CONSTRAINT not_leftmost_pk PRIMARY KEY (c);
28+
DELETE FROM not_leftmost WHERE c ~ '^[1-4]';
29+
SELECT pg_create_physical_replication_slot('for_waldump', true, false);
30+
COMMIT;
31+
EOSQL
32+
$origin->safe_psql('postgres',$setup);
33+
my$before_vacuum_walfile =
34+
$origin->safe_psql('postgres',"SELECT pg_walfile_name(pg_current_wal_lsn())");
35+
# VACUUM to delete the aforementioned leaf page. Force an XLogFlush() by
36+
# dropping a permanent table. That way, the XLogReader infrastructure can
37+
# always see VACUUM's records, even under synchronous_commit=off. Finally,
38+
# find the LSN of that VACUUM's last UNLINK_PAGE record.
39+
my$vacuum =<<EOSQL;
40+
SET synchronous_commit = off;
41+
VACUUM (VERBOSE, INDEX_CLEANUP ON) not_leftmost;
42+
CREATE TABLE XLogFlush ();
43+
DROP TABLE XLogFlush;
44+
SELECT pg_walfile_name(pg_current_wal_flush_lsn());
45+
EOSQL
46+
my$after_unlink_walfile =$origin->safe_psql('postgres',$vacuum);
47+
$origin->stop;
48+
my$unlink_lsn =do {
49+
local%ENV =$origin->_get_env();
50+
my$stdout;
51+
run_log(['pg_waldump','-p',$origin->data_dir .'/pg_wal',
52+
$before_vacuum_walfile,$after_unlink_walfile],
53+
'>', \$stdout);
54+
$stdout =~m|^rmgr: Btree .*, lsn: ([/0-9A-F]+), .*, desc: UNLINK_PAGE left|m;
55+
$1;
56+
};
57+
die"did not find UNLINK_PAGE record"unless$unlink_lsn;
58+
59+
# replica node: amcheck at notable points in the WAL stream
60+
my$replica = PostgreSQL::Test::Cluster->new('replica');
61+
$replica->init_from_backup($origin,'my_backup',has_restoring=> 1);
62+
$replica->append_conf('postgresql.conf',
63+
"recovery_target_lsn = '$unlink_lsn'");
64+
$replica->append_conf('postgresql.conf','recovery_target_inclusive = off');
65+
$replica->append_conf('postgresql.conf','recovery_target_action = promote');
66+
$replica->start;
67+
$replica->poll_query_until('postgres',"SELECT pg_is_in_recovery() = 'f';")
68+
ordie"Timed out while waiting for PITR promotion";
69+
# recovery done; run amcheck
70+
my$debug ="SET client_min_messages = 'debug1'";
71+
my ($rc,$stderr);
72+
$rc =$replica->psql(
73+
'postgres',
74+
"$debug; SELECT bt_index_parent_check('not_leftmost_pk', true)",
75+
stderr=> \$stderr);
76+
printSTDERR$stderr,"\n";
77+
is($rc, 0,"bt_index_parent_check passes");
78+
like(
79+
$stderr,
80+
qr/interrupted page deletion detected/,
81+
"bt_index_parent_check: interrupted page deletion detected");
82+
$rc =$replica->psql(
83+
'postgres',
84+
"$debug; SELECT bt_index_check('not_leftmost_pk', true)",
85+
stderr=> \$stderr);
86+
printSTDERR$stderr,"\n";
87+
is($rc, 0,"bt_index_check passes");
88+
89+
done_testing();

‎contrib/amcheck/verify_nbtree.c

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
146146
boolrootdescend);
147147
staticBtreeLevelbt_check_level_from_leftmost(BtreeCheckState*state,
148148
BtreeLevellevel);
149+
staticboolbt_leftmost_ignoring_half_dead(BtreeCheckState*state,
150+
BlockNumberstart,
151+
BTPageOpaquestart_opaque);
149152
staticvoidbt_recheck_sibling_links(BtreeCheckState*state,
150153
BlockNumberbtpo_prev_from_target,
151154
BlockNumberleftcurrent);
@@ -774,7 +777,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
774777
*/
775778
if (state->readonly)
776779
{
777-
if (!P_LEFTMOST(opaque))
780+
if (!bt_leftmost_ignoring_half_dead(state,current,opaque))
778781
ereport(ERROR,
779782
(errcode(ERRCODE_INDEX_CORRUPTED),
780783
errmsg("block %u is not leftmost in index \"%s\"",
@@ -828,8 +831,16 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
828831
*/
829832
}
830833

831-
/* Sibling links should be in mutual agreement */
832-
if (opaque->btpo_prev!=leftcurrent)
834+
/*
835+
* Sibling links should be in mutual agreement. There arises
836+
* leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
837+
* of the parent's low-key downlink is half-dead. (A half-dead page
838+
* has no downlink from its parent.) Under heavyweight locking, the
839+
* last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
840+
* Without heavyweight locking, validation of the P_NONE case remains
841+
* unimplemented.
842+
*/
843+
if (opaque->btpo_prev!=leftcurrent&&leftcurrent!=P_NONE)
833844
bt_recheck_sibling_links(state,opaque->btpo_prev,leftcurrent);
834845

835846
/* Check level */
@@ -910,6 +921,66 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
910921
returnnextleveldown;
911922
}
912923

924+
/*
925+
* Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
926+
* half-dead, sibling-linked pages to the left. If a half-dead page appears
927+
* under state->readonly, the database exited recovery between the first-stage
928+
* and second-stage WAL records of a deletion.
929+
*/
930+
staticbool
931+
bt_leftmost_ignoring_half_dead(BtreeCheckState*state,
932+
BlockNumberstart,
933+
BTPageOpaquestart_opaque)
934+
{
935+
BlockNumberreached=start_opaque->btpo_prev,
936+
reached_from=start;
937+
boolall_half_dead= true;
938+
939+
/*
940+
* To handle the !readonly case, we'd need to accept BTP_DELETED pages and
941+
* potentially observe nbtree/README "Page deletion and backwards scans".
942+
*/
943+
Assert(state->readonly);
944+
945+
while (reached!=P_NONE&&all_half_dead)
946+
{
947+
Pagepage=palloc_btree_page(state,reached);
948+
BTPageOpaquereached_opaque= (BTPageOpaque)PageGetSpecialPointer(page);
949+
950+
CHECK_FOR_INTERRUPTS();
951+
952+
/*
953+
* Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
954+
* writes that side-links will continue to point to the siblings.
955+
* Check btpo_next for that property.
956+
*/
957+
all_half_dead=P_ISHALFDEAD(reached_opaque)&&
958+
reached!=start&&
959+
reached!=reached_from&&
960+
reached_opaque->btpo_next==reached_from;
961+
if (all_half_dead)
962+
{
963+
XLogRecPtrpagelsn=PageGetLSN(page);
964+
965+
/* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
966+
ereport(DEBUG1,
967+
(errcode(ERRCODE_NO_DATA),
968+
errmsg_internal("harmless interrupted page deletion detected in index \"%s\"",
969+
RelationGetRelationName(state->rel)),
970+
errdetail_internal("Block=%u right block=%u page lsn=%X/%X.",
971+
reached,reached_from,
972+
LSN_FORMAT_ARGS(pagelsn))));
973+
974+
reached_from=reached;
975+
reached=reached_opaque->btpo_prev;
976+
}
977+
978+
pfree(page);
979+
}
980+
981+
returnall_half_dead;
982+
}
983+
913984
/*
914985
* Raise an error when target page's left link does not point back to the
915986
* previous target page, called leftcurrent here. The leftcurrent page's
@@ -950,6 +1021,9 @@ bt_recheck_sibling_links(BtreeCheckState *state,
9501021
BlockNumberbtpo_prev_from_target,
9511022
BlockNumberleftcurrent)
9521023
{
1024+
/* taking BTPageOpaque from metapage would give irrelevant findings */
1025+
Assert(leftcurrent!=P_NONE);
1026+
9531027
if (!state->readonly)
9541028
{
9551029
Bufferlbuf;
@@ -1933,7 +2007,8 @@ bt_child_highkey_check(BtreeCheckState *state,
19332007
opaque= (BTPageOpaque)PageGetSpecialPointer(page);
19342008

19352009
/* The first page we visit at the level should be leftmost */
1936-
if (first&& !BlockNumberIsValid(state->prevrightlink)&& !P_LEFTMOST(opaque))
2010+
if (first&& !BlockNumberIsValid(state->prevrightlink)&&
2011+
!bt_leftmost_ignoring_half_dead(state,blkno,opaque))
19372012
ereport(ERROR,
19382013
(errcode(ERRCODE_INDEX_CORRUPTED),
19392014
errmsg("the first child of leftmost target page is not leftmost of its level in index \"%s\"",

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp