Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite2f65f4

Browse files
committed
Fix old-fd issues using global barriers everywhere.
Commits4eb2176 andb74e94d introduced a way to force every backend toclose all relation files, to fix an ancient Windows-only bug.This commit extends that behavior to all operating systems and addsa couple of extra barrier points, to fix a totally different class ofbug: the reuse of relfilenodes in scenarios that have no other kind ofcache invalidation to prevent file descriptor mix-ups.In all releases, data corruption could occur when you moved a databaseto another tablespace and then back again. Despite that, no back-patchfor now as the infrastructure required is too new and invasive. Inmaster only, since commitaa01051, it could also happen when usingCREATE DATABASE with a user-supplied OID or via pg_upgrade.Author: Andres Freund <andres@anarazel.de>Reviewed-by: Robert Haas <robertmhaas@gmail.com>Reviewed-by: Thomas Munro <thomas.munro@gmail.com>Discussion:https://postgr.es/m/20220209220004.kb3dgtn2x2k2gtdm%40alap3.anarazel.de
1 parentb74e94d commite2f65f4

File tree

5 files changed

+241
-25
lines changed

5 files changed

+241
-25
lines changed

‎src/backend/commands/dbcommands.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,10 +1687,8 @@ dropdb(const char *dbname, bool missing_ok, bool force)
16871687
*/
16881688
RequestCheckpoint(CHECKPOINT_IMMEDIATE |CHECKPOINT_FORCE |CHECKPOINT_WAIT);
16891689

1690-
#if defined(USE_BARRIER_SMGRRELEASE)
16911690
/* Close all smgr fds in all backends. */
16921691
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
1693-
#endif
16941692

16951693
/*
16961694
* Remove all tablespace subdirs belonging to the database.
@@ -1940,10 +1938,8 @@ movedb(const char *dbname, const char *tblspcname)
19401938
RequestCheckpoint(CHECKPOINT_IMMEDIATE |CHECKPOINT_FORCE |CHECKPOINT_WAIT
19411939
|CHECKPOINT_FLUSH_ALL);
19421940

1943-
#if defined(USE_BARRIER_SMGRRELEASE)
19441941
/* Close all smgr fds in all backends. */
19451942
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
1946-
#endif
19471943

19481944
/*
19491945
* Now drop all buffers holding data of the target database; they should
@@ -3054,6 +3050,9 @@ dbase_redo(XLogReaderState *record)
30543050
*/
30553051
FlushDatabaseBuffers(xlrec->src_db_id);
30563052

3053+
/* Close all sgmr fds in all backends. */
3054+
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
3055+
30573056
/*
30583057
* Copy this subdirectory to the new location
30593058
*
@@ -3111,10 +3110,8 @@ dbase_redo(XLogReaderState *record)
31113110
/* Clean out the xlog relcache too */
31123111
XLogDropDatabase(xlrec->db_id);
31133112

3114-
#if defined(USE_BARRIER_SMGRRELEASE)
31153113
/* Close all sgmr fds in all backends. */
31163114
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
3117-
#endif
31183115

31193116
for (i=0;i<xlrec->ntablespaces;i++)
31203117
{

‎src/backend/commands/tablespace.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -548,11 +548,10 @@ DropTableSpace(DropTableSpaceStmt *stmt)
548548
* use a global barrier to ask all backends to close all files, and
549549
* wait until they're finished.
550550
*/
551-
#if defined(USE_BARRIER_SMGRRELEASE)
552551
LWLockRelease(TablespaceCreateLock);
553552
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
554553
LWLockAcquire(TablespaceCreateLock,LW_EXCLUSIVE);
555-
#endif
554+
556555
/* And now try again. */
557556
if (!destroy_tablespace_directories(tablespaceoid, false))
558557
{
@@ -1574,6 +1573,9 @@ tblspc_redo(XLogReaderState *record)
15741573
{
15751574
xl_tblspc_drop_rec*xlrec= (xl_tblspc_drop_rec*)XLogRecGetData(record);
15761575

1576+
/* Close all smgr fds in all backends. */
1577+
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
1578+
15771579
/*
15781580
* If we issued a WAL record for a drop tablespace it implies that
15791581
* there were no files in it at all when the DROP was done. That means
@@ -1591,11 +1593,6 @@ tblspc_redo(XLogReaderState *record)
15911593
*/
15921594
if (!destroy_tablespace_directories(xlrec->ts_id, true))
15931595
{
1594-
#if defined(USE_BARRIER_SMGRRELEASE)
1595-
/* Close all smgr fds in all backends. */
1596-
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
1597-
#endif
1598-
15991596
ResolveRecoveryConflictWithTablespace(xlrec->ts_id);
16001597

16011598
/*

‎src/include/pg_config_manual.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -152,17 +152,6 @@
152152
#defineEXEC_BACKEND
153153
#endif
154154

155-
/*
156-
* If USE_BARRIER_SMGRRELEASE is defined, certain code paths that unlink
157-
* directories will ask other backends to close all smgr file descriptors.
158-
* This is enabled on Windows, because otherwise unlinked but still open files
159-
* can prevent rmdir(containing_directory) from succeeding. On other
160-
* platforms, it can be defined to exercise those code paths.
161-
*/
162-
#if defined(WIN32)
163-
#defineUSE_BARRIER_SMGRRELEASE
164-
#endif
165-
166155
/*
167156
* Define this if your operating system supports link()
168157
*/

‎src/test/recovery/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#
1010
#-------------------------------------------------------------------------
1111

12-
EXTRA_INSTALL=contrib/test_decoding
12+
EXTRA_INSTALL=contrib/test_decoding contrib/pg_prewarm
1313

1414
subdir = src/test/recovery
1515
top_builddir = ../../..
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
use strict;
2+
use warnings;
3+
use PostgreSQL::Test::Cluster;
4+
use PostgreSQL::Test::Utils;
5+
use Test::More;
6+
use File::Basename;
7+
8+
9+
my$node_primary = PostgreSQL::Test::Cluster->new('primary');
10+
$node_primary->init(allows_streaming=> 1);
11+
$node_primary->append_conf('postgresql.conf',q[
12+
allow_in_place_tablespaces = true
13+
log_connections=on
14+
# to avoid "repairing" corruption
15+
full_page_writes=off
16+
log_min_messages=debug2
17+
autovacuum_naptime=1s
18+
shared_buffers=1MB
19+
]);
20+
$node_primary->start;
21+
22+
23+
# Create streaming standby linking to primary
24+
my$backup_name ='my_backup';
25+
$node_primary->backup($backup_name);
26+
my$node_standby = PostgreSQL::Test::Cluster->new('standby');
27+
$node_standby->init_from_backup($node_primary,$backup_name,
28+
has_streaming=> 1);
29+
$node_standby->start;
30+
31+
# To avoid hanging while expecting some specific input from a psql
32+
# instance being driven by us, add a timeout high enough that it
33+
# should never trigger even on very slow machines, unless something
34+
# is really wrong.
35+
my$psql_timeout = IPC::Run::timer(300);
36+
37+
my%psql_primary = (stdin=>'',stdout=>'',stderr=>'');
38+
$psql_primary{run} = IPC::Run::start(
39+
['psql','-XA','-f','-','-d',$node_primary->connstr('postgres') ],
40+
'<',
41+
\$psql_primary{stdin},
42+
'>',
43+
\$psql_primary{stdout},
44+
'2>',
45+
\$psql_primary{stderr},
46+
$psql_timeout);
47+
48+
my%psql_standby = ('stdin'=>'','stdout'=>'','stderr'=>'');
49+
$psql_standby{run} = IPC::Run::start(
50+
['psql','-XA','-f','-','-d',$node_standby->connstr('postgres') ],
51+
'<',
52+
\$psql_standby{stdin},
53+
'>',
54+
\$psql_standby{stdout},
55+
'2>',
56+
\$psql_standby{stderr},
57+
$psql_timeout);
58+
59+
60+
# Create template database with a table that we'll update, to trigger dirty
61+
# rows. Using a template database + preexisting rows makes it a bit easier to
62+
# reproduce, because there's no cache invalidations generated.
63+
64+
$node_primary->safe_psql('postgres',"CREATE DATABASE conflict_db_template OID = 50000;");
65+
$node_primary->safe_psql('conflict_db_template',q[
66+
CREATE TABLE large(id serial primary key, dataa text, datab text);
67+
INSERT INTO large(dataa, datab) SELECT g.i::text, 1 FROM generate_series(1, 4000) g(i);]);
68+
$node_primary->safe_psql('postgres',"CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");
69+
70+
$node_primary->safe_psql('postgres',q[
71+
CREATE EXTENSION pg_prewarm;
72+
CREATE TABLE replace_sb(data text);
73+
INSERT INTO replace_sb(data) SELECT random()::text FROM generate_series(1, 15000);]);
74+
75+
# Use longrunning transactions, so that AtEOXact_SMgr doesn't close files
76+
send_query_and_wait(
77+
\%psql_primary,
78+
q[BEGIN;],
79+
qr/BEGIN/m);
80+
send_query_and_wait(
81+
\%psql_standby,
82+
q[BEGIN;],
83+
qr/BEGIN/m);
84+
85+
# Cause lots of dirty rows in shared_buffers
86+
$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 1;");
87+
88+
# Now do a bunch of work in another database. That will end up needing to
89+
# write back dirty data from the previous step, opening the relevant file
90+
# descriptors
91+
cause_eviction(\%psql_primary, \%psql_standby);
92+
93+
# drop and recreate database
94+
$node_primary->safe_psql('postgres',"DROP DATABASE conflict_db;");
95+
$node_primary->safe_psql('postgres',"CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");
96+
97+
verify($node_primary,$node_standby, 1,
98+
"initial contents as expected");
99+
100+
# Again cause lots of dirty rows in shared_buffers, but use a different update
101+
# value so we can check everything is OK
102+
$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 2;");
103+
104+
# Again cause a lot of IO. That'll again write back dirty data, but uses (XXX
105+
# adjust after bugfix) the already opened file descriptor.
106+
# FIXME
107+
cause_eviction(\%psql_primary, \%psql_standby);
108+
109+
verify($node_primary,$node_standby, 2,
110+
"update to reused relfilenode (due to DB oid conflict) is not lost");
111+
112+
113+
$node_primary->safe_psql('conflict_db',"VACUUM FULL large;");
114+
$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 3;");
115+
116+
verify($node_primary,$node_standby, 3,
117+
"restored contents as expected");
118+
119+
# Test for old filehandles after moving a database in / out of tablespace
120+
$node_primary->safe_psql('postgres',q[CREATE TABLESPACE test_tablespace LOCATION '']);
121+
122+
# cause dirty buffers
123+
$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 4;");
124+
# cause files to be opened in backend in other database
125+
cause_eviction(\%psql_primary, \%psql_standby);
126+
127+
# move database back / forth
128+
$node_primary->safe_psql('postgres','ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');
129+
$node_primary->safe_psql('postgres','ALTER DATABASE conflict_db SET TABLESPACE pg_default');
130+
131+
# cause dirty buffers
132+
$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 5;");
133+
cause_eviction(\%psql_primary, \%psql_standby);
134+
135+
verify($node_primary,$node_standby, 5,
136+
"post move contents as expected");
137+
138+
$node_primary->safe_psql('postgres','ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');
139+
140+
$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 7;");
141+
cause_eviction(\%psql_primary, \%psql_standby);
142+
$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 8;");
143+
$node_primary->safe_psql('postgres','DROP DATABASE conflict_db');
144+
$node_primary->safe_psql('postgres','DROP TABLESPACE test_tablespace');
145+
146+
$node_primary->safe_psql('postgres','REINDEX TABLE pg_database');
147+
148+
149+
# explicitly shut down psql instances gracefully - to avoid hangs
150+
# or worse on windows
151+
$psql_primary{stdin} .="\\q\n";
152+
$psql_primary{run}->finish;
153+
$psql_standby{stdin} .="\\q\n";
154+
$psql_standby{run}->finish;
155+
156+
$node_primary->stop();
157+
$node_standby->stop();
158+
159+
# Make sure that there weren't crashes during shutdown
160+
161+
command_like(['pg_controldata',$node_primary->data_dir ],
162+
qr/Database cluster state:\s+shut down\n/,'primary shut down ok');
163+
command_like(['pg_controldata',$node_standby->data_dir ],
164+
qr/Database cluster state:\s+shut down in recovery\n/,'standby shut down ok');
165+
done_testing();
166+
167+
subverify
168+
{
169+
my ($primary,$standby,$counter,$message) =@_;
170+
171+
my$query ="SELECT datab, count(*) FROM large GROUP BY 1 ORDER BY 1 LIMIT 10";
172+
is($primary->safe_psql('conflict_db',$query),
173+
"$counter|4000",
174+
"primary:$message");
175+
176+
$primary->wait_for_catchup($standby);
177+
is($standby->safe_psql('conflict_db',$query),
178+
"$counter|4000",
179+
"standby:$message");
180+
}
181+
182+
subcause_eviction
183+
{
184+
my ($psql_primary,$psql_standby) =@_;
185+
186+
send_query_and_wait(
187+
$psql_primary,
188+
q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],
189+
qr/warmed_buffers/m);
190+
191+
send_query_and_wait(
192+
$psql_standby,
193+
q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],
194+
qr/warmed_buffers/m);
195+
}
196+
197+
# Send query, wait until string matches
198+
subsend_query_and_wait
199+
{
200+
my ($psql,$query,$untl) =@_;
201+
my$ret;
202+
203+
# send query
204+
$$psql{stdin} .=$query;
205+
$$psql{stdin} .="\n";
206+
207+
# wait for query results
208+
$$psql{run}->pump_nb();
209+
while (1)
210+
{
211+
lastif$$psql{stdout} =~/$untl/;
212+
213+
if ($psql_timeout->is_expired)
214+
{
215+
BAIL_OUT("aborting wait: program timed out\n"
216+
."stream contents: >>$$psql{stdout}<<\n"
217+
."pattern searched for:$untl\n");
218+
return 0;
219+
}
220+
if (not$$psql{run}->pumpable())
221+
{
222+
BAIL_OUT("aborting wait: program died\n"
223+
."stream contents: >>$$psql{stdout}<<\n"
224+
."pattern searched for:$untl\n");
225+
return 0;
226+
}
227+
$$psql{run}->pump();
228+
}
229+
230+
$$psql{stdout} ='';
231+
232+
return 1;
233+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp