Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0446d3b

Browse files
committed
Backpatch 031_recovery_conflict.pl.
The prior commit showed that the introduction of recovery conflict tests was agood idea. Without these tests it's hard to know that the fix didn't breaksomething...031_recovery_conflict.pl was introduced in9f8a050 and extended in21e1844.Discussion:https://postgr.es/m/20220413002626.udl7lll7f3o7nre7@alap3.anarazel.deBackpatch: 10-14
1 parent57c5ad1 commit0446d3b

File tree

1 file changed

+371
-0
lines changed

1 file changed

+371
-0
lines changed
Lines changed: 371 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,371 @@
1+
# Copyright (c) 2021-2022, PostgreSQL Global Development Group
2+
3+
# Test that connections to a hot standby are correctly canceled when a
4+
# recovery conflict is detected Also, test that statistics in
5+
# pg_stat_database_conflicts are populated correctly
6+
7+
use strict;
8+
use warnings;
9+
use PostgreSQL::Test::Cluster;
10+
use PostgreSQL::Test::Utils;
11+
use Test::More;
12+
13+
14+
# Set up nodes
15+
my$node_primary = PostgreSQL::Test::Cluster->new('primary');
16+
$node_primary->init(allows_streaming=> 1);
17+
18+
my$tablespace1 ="test_recovery_conflict_tblspc";
19+
20+
$node_primary->append_conf(
21+
'postgresql.conf',qq[
22+
23+
# Doesn't currently exist pre 15, but might be backpatched later
24+
#allow_in_place_tablespaces = on
25+
#temp_tablespaces =$tablespace1
26+
27+
log_temp_files = 0
28+
29+
# for deadlock test
30+
max_prepared_transactions = 10
31+
32+
# wait some to test the wait paths as well, but not long for obvious reasons
33+
max_standby_streaming_delay = 50ms
34+
35+
# Some of the recovery conflict logging code only gets exercised after
36+
# deadlock_timeout. The test doesn't rely on that additional output, but it's
37+
# nice to get some minimal coverage of that code.
38+
#log_recovery_conflict_waits = on # doesn't exist < 14.
39+
deadlock_timeout = 10ms
40+
]);
41+
$node_primary->start;
42+
43+
my$backup_name ='my_backup';
44+
45+
# See allow_in_place_tablespaces comment above
46+
#$node_primary->safe_psql('postgres',
47+
#qq[CREATE TABLESPACE $tablespace1 LOCATION '']);
48+
49+
$node_primary->backup($backup_name);
50+
my$node_standby = PostgreSQL::Test::Cluster->new('standby');
51+
$node_standby->init_from_backup($node_primary,$backup_name,
52+
has_streaming=> 1);
53+
54+
$node_standby->start;
55+
56+
my$test_db ="test_db";
57+
58+
# use a new database, to trigger database recovery conflict
59+
$node_primary->safe_psql('postgres',"CREATE DATABASE$test_db");
60+
61+
# test schema / data
62+
my$table1 ="test_recovery_conflict_table1";
63+
my$table2 ="test_recovery_conflict_table2";
64+
$node_primary->safe_psql(
65+
$test_db,qq[
66+
CREATE TABLE${table1}(a int, b int);
67+
INSERT INTO$table1 SELECT i % 3, 0 FROM generate_series(1,20) i;
68+
CREATE TABLE${table2}(a int, b int);
69+
]);
70+
my$primary_lsn =$node_primary->lsn('flush');
71+
$node_primary->wait_for_catchup($node_standby,'replay',$primary_lsn);
72+
73+
74+
# a longrunning psql that we can use to trigger conflicts
75+
my$psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
76+
my%psql_standby = ('stdin'=>'','stdout'=>'');
77+
$psql_standby{run} =
78+
$node_standby->background_psql($test_db, \$psql_standby{stdin},
79+
\$psql_standby{stdout},
80+
$psql_timeout);
81+
$psql_standby{stdout} ='';
82+
83+
my$expected_conflicts = 0;
84+
85+
86+
## RECOVERY CONFLICT 1: Buffer pin conflict
87+
my$sect ="buffer pin conflict";
88+
$expected_conflicts++;
89+
90+
# Aborted INSERT on primary that will be cleaned up by vacuum. Has to be old
91+
# enough so that there's not a snapshot conflict before the buffer pin
92+
# conflict.
93+
94+
$node_primary->safe_psql(
95+
$test_db,
96+
qq[
97+
BEGIN;
98+
INSERT INTO$table1 VALUES (1,0);
99+
ROLLBACK;
100+
-- ensure flush, rollback doesn't do so
101+
BEGIN; LOCK$table1; COMMIT;
102+
]);
103+
104+
$primary_lsn =$node_primary->lsn('flush');
105+
$node_primary->wait_for_catchup($node_standby,'replay',$primary_lsn);
106+
107+
my$cursor1 ="test_recovery_conflict_cursor";
108+
109+
# DECLARE and use a cursor on standby, causing buffer with the only block of
110+
# the relation to be pinned on the standby
111+
$psql_standby{stdin} .=qq[
112+
BEGIN;
113+
DECLARE$cursor1 CURSOR FOR SELECT b FROM$table1;
114+
FETCH FORWARD FROM$cursor1;
115+
];
116+
# FETCH FORWARD should have returned a 0 since all values of b in the table
117+
# are 0
118+
ok(pump_until_standby(qr/^0$/m),
119+
"$sect: cursor with conflicting pin established");
120+
121+
# to check the log starting now for recovery conflict messages
122+
my$log_location =-s$node_standby->logfile;
123+
124+
# VACUUM on the primary
125+
$node_primary->safe_psql($test_db,qq[VACUUM$table1;]);
126+
127+
# Wait for catchup. Existing connection will be terminated before replay is
128+
# finished, so waiting for catchup ensures that there is no race between
129+
# encountering the recovery conflict which causes the disconnect and checking
130+
# the logfile for the terminated connection.
131+
$primary_lsn =$node_primary->lsn('flush');
132+
$node_primary->wait_for_catchup($node_standby,'replay',$primary_lsn);
133+
134+
check_conflict_log("User was holding shared buffer pin for too long");
135+
reconnect_and_clear();
136+
check_conflict_stat("bufferpin");
137+
138+
139+
## RECOVERY CONFLICT 2: Snapshot conflict
140+
$sect ="snapshot conflict";
141+
$expected_conflicts++;
142+
143+
$node_primary->safe_psql($test_db,
144+
qq[INSERT INTO$table1 SELECT i, 0 FROM generate_series(1,20) i]);
145+
$primary_lsn =$node_primary->lsn('flush');
146+
$node_primary->wait_for_catchup($node_standby,'replay',$primary_lsn);
147+
148+
# DECLARE and FETCH from cursor on the standby
149+
$psql_standby{stdin} .=qq[
150+
BEGIN;
151+
DECLARE$cursor1 CURSOR FOR SELECT b FROM$table1;
152+
FETCH FORWARD FROM$cursor1;
153+
];
154+
ok( pump_until(
155+
$psql_standby{run},$psql_timeout,
156+
\$psql_standby{stdout},qr/^0$/m,),
157+
"$sect: cursor with conflicting snapshot established");
158+
159+
# Do some HOT updates
160+
$node_primary->safe_psql($test_db,
161+
qq[UPDATE$table1 SET a = a + 1 WHERE a > 2;]);
162+
163+
# VACUUM, pruning those dead tuples
164+
$node_primary->safe_psql($test_db,qq[VACUUM$table1;]);
165+
166+
# Wait for attempted replay of PRUNE records
167+
$primary_lsn =$node_primary->lsn('flush');
168+
$node_primary->wait_for_catchup($node_standby,'replay',$primary_lsn);
169+
170+
check_conflict_log(
171+
"User query might have needed to see row versions that must be removed");
172+
reconnect_and_clear();
173+
check_conflict_stat("snapshot");
174+
175+
176+
## RECOVERY CONFLICT 3: Lock conflict
177+
$sect ="lock conflict";
178+
$expected_conflicts++;
179+
180+
# acquire lock to conflict with
181+
$psql_standby{stdin} .=qq[
182+
BEGIN;
183+
LOCK TABLE$table1 IN ACCESS SHARE MODE;
184+
SELECT 1;
185+
];
186+
ok(pump_until_standby(qr/^1$/m),"$sect: conflicting lock acquired");
187+
188+
# DROP TABLE containing block which standby has in a pinned buffer
189+
$node_primary->safe_psql($test_db,qq[DROP TABLE$table1;]);
190+
191+
$primary_lsn =$node_primary->lsn('flush');
192+
$node_primary->wait_for_catchup($node_standby,'replay',$primary_lsn);
193+
194+
check_conflict_log("User was holding a relation lock for too long");
195+
reconnect_and_clear();
196+
check_conflict_stat("lock");
197+
198+
199+
# See allow_in_place_tablespaces comment above
200+
### RECOVERY CONFLICT 4: Tablespace conflict
201+
#$sect = "tablespace conflict";
202+
#$expected_conflicts++;
203+
#
204+
## DECLARE a cursor for a query which, with sufficiently low work_mem, will
205+
## spill tuples into temp files in the temporary tablespace created during
206+
## setup.
207+
#$psql_standby{stdin} .= qq[
208+
# BEGIN;
209+
# SET work_mem = '64kB';
210+
# DECLARE $cursor1 CURSOR FOR
211+
# SELECT count(*) FROM generate_series(1,6000);
212+
# FETCH FORWARD FROM $cursor1;
213+
# ];
214+
#ok(pump_until_standby(qr/^6000$/m),
215+
#"$sect: cursor with conflicting temp file established");
216+
#
217+
## Drop the tablespace currently containing spill files for the query on the
218+
## standby
219+
#$node_primary->safe_psql($test_db, qq[DROP TABLESPACE $tablespace1;]);
220+
#
221+
#$primary_lsn = $node_primary->lsn('flush');
222+
#$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
223+
#
224+
#check_conflict_log(
225+
#"User was or might have been using tablespace that must be dropped");
226+
#reconnect_and_clear();
227+
#check_conflict_stat("tablespace");
228+
229+
230+
## RECOVERY CONFLICT 5: Deadlock
231+
$sect ="startup deadlock";
232+
$expected_conflicts++;
233+
234+
# Generate a few dead rows, to later be cleaned up by vacuum. Then acquire a
235+
# lock on another relation in a prepared xact, so it's held continuously by
236+
# the startup process. The standby psql will block acquiring that lock while
237+
# holding a pin that vacuum needs, triggering the deadlock.
238+
$node_primary->safe_psql(
239+
$test_db,
240+
qq[
241+
CREATE TABLE$table1(a int, b int);
242+
INSERT INTO$table1 VALUES (1);
243+
BEGIN;
244+
INSERT INTO$table1(a) SELECT generate_series(1, 100) i;
245+
ROLLBACK;
246+
BEGIN;
247+
LOCK TABLE$table2;
248+
PREPARE TRANSACTION 'lock';
249+
INSERT INTO$table1(a) VALUES (170);
250+
SELECT txid_current();
251+
]);
252+
253+
$primary_lsn =$node_primary->lsn('flush');
254+
$node_primary->wait_for_catchup($node_standby,'replay',$primary_lsn);
255+
256+
$psql_standby{stdin} .=qq[
257+
BEGIN;
258+
-- hold pin
259+
DECLARE$cursor1 CURSOR FOR SELECT a FROM$table1;
260+
FETCH FORWARD FROM$cursor1;
261+
-- wait for lock held by prepared transaction
262+
SELECT * FROM$table2;
263+
];
264+
ok( pump_until(
265+
$psql_standby{run},$psql_timeout,
266+
\$psql_standby{stdout},qr/^1$/m,),
267+
"$sect: cursor holding conflicting pin, also waiting for lock, established"
268+
);
269+
270+
# just to make sure we're waiting for lock already
271+
ok($node_standby->poll_query_until(
272+
'postgres',qq[
273+
SELECT 'waiting' FROM pg_locks WHERE locktype = 'relation' AND NOT granted;
274+
],'waiting'),
275+
"$sect: lock acquisition is waiting");
276+
277+
# VACUUM will prune away rows, causing a buffer pin conflict, while standby
278+
# psql is waiting on lock
279+
$node_primary->safe_psql($test_db,qq[VACUUM$table1;]);
280+
$primary_lsn =$node_primary->lsn('flush');
281+
$node_primary->wait_for_catchup($node_standby,'replay',$primary_lsn);
282+
283+
check_conflict_log("User transaction caused buffer deadlock with recovery.");
284+
reconnect_and_clear();
285+
check_conflict_stat("deadlock");
286+
287+
# clean up for next tests
288+
$node_primary->safe_psql($test_db,qq[ROLLBACK PREPARED 'lock';]);
289+
290+
291+
# Check that expected number of conflicts show in pg_stat_database. Needs to
292+
# be tested before database is dropped, for obvious reasons.
293+
is($node_standby->safe_psql(
294+
$test_db,
295+
qq[SELECT conflicts FROM pg_stat_database WHERE datname='$test_db';]),
296+
$expected_conflicts,
297+
qq[$expected_conflicts recovery conflicts shown in pg_stat_database]);
298+
299+
300+
## RECOVERY CONFLICT 6: Database conflict
301+
$sect ="database conflict";
302+
303+
$node_primary->safe_psql('postgres',qq[DROP DATABASE$test_db;]);
304+
305+
$primary_lsn =$node_primary->lsn('flush');
306+
$node_primary->wait_for_catchup($node_standby,'replay',$primary_lsn);
307+
308+
check_conflict_log("User was connected to a database that must be dropped");
309+
310+
311+
# explicitly shut down psql instances gracefully - to avoid hangs or worse on
312+
# windows
313+
$psql_standby{stdin} .="\\q\n";
314+
$psql_standby{run}->finish;
315+
316+
$node_standby->stop();
317+
$node_primary->stop();
318+
319+
320+
done_testing();
321+
322+
323+
subpump_until_standby
324+
{
325+
my$match =shift;
326+
327+
return pump_until($psql_standby{run},$psql_timeout,
328+
\$psql_standby{stdout},$match);
329+
}
330+
331+
subreconnect_and_clear
332+
{
333+
# If psql isn't dead already, tell it to quit as \q, when already dead,
334+
# causes IPC::Run to unhelpfully error out with "ack Broken pipe:".
335+
$psql_standby{run}->pump_nb();
336+
if ($psql_standby{run}->pumpable())
337+
{
338+
$psql_standby{stdin} .="\\q\n";
339+
}
340+
$psql_standby{run}->finish;
341+
342+
# restart
343+
$psql_standby{run}->run();
344+
$psql_standby{stdin} ='';
345+
$psql_standby{stdout} ='';
346+
347+
# Run query to ensure connection has finished re-establishing
348+
$psql_standby{stdin} .=qq[SELECT 1;\n];
349+
dieunless pump_until_standby(qr/^1$/m);
350+
$psql_standby{stdout} ='';
351+
}
352+
353+
subcheck_conflict_log
354+
{
355+
my$message =shift;
356+
my$old_log_location =$log_location;
357+
358+
$log_location =$node_standby->wait_for_log(qr/$message/,$log_location);
359+
360+
cmp_ok($log_location,'>',$old_log_location,
361+
"$sect: logfile contains terminated connection due to recovery conflict"
362+
);
363+
}
364+
365+
subcheck_conflict_stat
366+
{
367+
# Stats can't easily be checked before 15, requires waiting for stats to
368+
# be reported to stats collector and then those messages need to be
369+
# processed. Dealt with here to reduce intra-branch difference in the
370+
# tests.
371+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp