NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commite2f65f4

committed

Fix old-fd issues using global barriers everywhere.

Commits4eb2176 andb74e94d introduced a way to force every backend toclose all relation files, to fix an ancient Windows-only bug.This commit extends that behavior to all operating systems and addsa couple of extra barrier points, to fix a totally different class ofbug: the reuse of relfilenodes in scenarios that have no other kind ofcache invalidation to prevent file descriptor mix-ups.In all releases, data corruption could occur when you moved a databaseto another tablespace and then back again. Despite that, no back-patchfor now as the infrastructure required is too new and invasive. Inmaster only, since commitaa01051, it could also happen when usingCREATE DATABASE with a user-supplied OID or via pg_upgrade.Author: Andres Freund <andres@anarazel.de>Reviewed-by: Robert Haas <robertmhaas@gmail.com>Reviewed-by: Thomas Munro <thomas.munro@gmail.com>Discussion:https://postgr.es/m/20220209220004.kb3dgtn2x2k2gtdm%40alap3.anarazel.de

1 parentb74e94d commite2f65f4Copy full SHA for e2f65f4

File tree

5 files changed

+241

-25

lines changed

src
- backend/commands
  - dbcommands.c
  - tablespace.c
- include
  - pg_config_manual.h
- test/recovery
  - Makefile
  - t
    - 032_relfilenode_reuse.pl

5 files changed

+241

-25

lines changed

`‎src/backend/commands/dbcommands.c‎`

Lines changed: 3 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -1687,10 +1687,8 @@ dropdb(const char *dbname, bool missing_ok, bool force)`
`1687`	`1687`	`*/`
`1688`	`1688`	`RequestCheckpoint(CHECKPOINT_IMMEDIATE \|CHECKPOINT_FORCE \|CHECKPOINT_WAIT);`
`1689`	`1689`
`1690`		`-#if defined(USE_BARRIER_SMGRRELEASE)`
`1691`	`1690`	`/* Close all smgr fds in all backends. */`
`1692`	`1691`	`WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));`
`1693`		`-#endif`
`1694`	`1692`
`1695`	`1693`	`/*`
`1696`	`1694`	`* Remove all tablespace subdirs belonging to the database.`
`@@ -1940,10 +1938,8 @@ movedb(const char dbname, const char tblspcname)`
`1940`	`1938`	`RequestCheckpoint(CHECKPOINT_IMMEDIATE \|CHECKPOINT_FORCE \|CHECKPOINT_WAIT`
`1941`	`1939`	`\|CHECKPOINT_FLUSH_ALL);`
`1942`	`1940`
`1943`		`-#if defined(USE_BARRIER_SMGRRELEASE)`
`1944`	`1941`	`/* Close all smgr fds in all backends. */`
`1945`	`1942`	`WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));`
`1946`		`-#endif`
`1947`	`1943`
`1948`	`1944`	`/*`
`1949`	`1945`	`* Now drop all buffers holding data of the target database; they should`
`@@ -3054,6 +3050,9 @@ dbase_redo(XLogReaderState *record)`
`3054`	`3050`	`*/`
`3055`	`3051`	`FlushDatabaseBuffers(xlrec->src_db_id);`
`3056`	`3052`
	`3053`	`+/* Close all sgmr fds in all backends. */`
	`3054`	`+WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));`
	`3055`	`+`
`3057`	`3056`	`/*`
`3058`	`3057`	`* Copy this subdirectory to the new location`
`3059`	`3058`	`*`
`@@ -3111,10 +3110,8 @@ dbase_redo(XLogReaderState *record)`
`3111`	`3110`	`/* Clean out the xlog relcache too */`
`3112`	`3111`	`XLogDropDatabase(xlrec->db_id);`
`3113`	`3112`
`3114`		`-#if defined(USE_BARRIER_SMGRRELEASE)`
`3115`	`3113`	`/* Close all sgmr fds in all backends. */`
`3116`	`3114`	`WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));`
`3117`		`-#endif`
`3118`	`3115`
`3119`	`3116`	`for (i=0;i<xlrec->ntablespaces;i++)`
`3120`	`3117`	`{`

`‎src/backend/commands/tablespace.c‎`

Lines changed: 4 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -548,11 +548,10 @@ DropTableSpace(DropTableSpaceStmt *stmt)`
`548`	`548`	`* use a global barrier to ask all backends to close all files, and`
`549`	`549`	`* wait until they're finished.`
`550`	`550`	`*/`
`551`		`-#if defined(USE_BARRIER_SMGRRELEASE)`
`552`	`551`	`LWLockRelease(TablespaceCreateLock);`
`553`	`552`	`WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));`
`554`	`553`	`LWLockAcquire(TablespaceCreateLock,LW_EXCLUSIVE);`
`555`		`-#endif`
	`554`	`+`
`556`	`555`	`/* And now try again. */`
`557`	`556`	`if (!destroy_tablespace_directories(tablespaceoid, false))`
`558`	`557`	`{`
`@@ -1574,6 +1573,9 @@ tblspc_redo(XLogReaderState *record)`
`1574`	`1573`	`{`
`1575`	`1574`	`xl_tblspc_drop_recxlrec= (xl_tblspc_drop_rec)XLogRecGetData(record);`
`1576`	`1575`
	`1576`	`+/* Close all smgr fds in all backends. */`
	`1577`	`+WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));`
	`1578`	`+`
`1577`	`1579`	`/*`
`1578`	`1580`	`* If we issued a WAL record for a drop tablespace it implies that`
`1579`	`1581`	`* there were no files in it at all when the DROP was done. That means`
`@@ -1591,11 +1593,6 @@ tblspc_redo(XLogReaderState *record)`
`1591`	`1593`	`*/`
`1592`	`1594`	`if (!destroy_tablespace_directories(xlrec->ts_id, true))`
`1593`	`1595`	`{`
`1594`		`-#if defined(USE_BARRIER_SMGRRELEASE)`
`1595`		`-/* Close all smgr fds in all backends. */`
`1596`		`-WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));`
`1597`		`-#endif`
`1598`		`-`
`1599`	`1596`	`ResolveRecoveryConflictWithTablespace(xlrec->ts_id);`
`1600`	`1597`
`1601`	`1598`	`/*`

`‎src/include/pg_config_manual.h‎`

Lines changed: 0 additions & 11 deletions

Original file line number	Diff line number	Diff line change
`@@ -152,17 +152,6 @@`
`152`	`152`	`#defineEXEC_BACKEND`
`153`	`153`	`#endif`
`154`	`154`
`155`		`-/*`
`156`		`- * If USE_BARRIER_SMGRRELEASE is defined, certain code paths that unlink`
`157`		`- * directories will ask other backends to close all smgr file descriptors.`
`158`		`- * This is enabled on Windows, because otherwise unlinked but still open files`
`159`		`- * can prevent rmdir(containing_directory) from succeeding. On other`
`160`		`- * platforms, it can be defined to exercise those code paths.`
`161`		`- */`
`162`		`-#if defined(WIN32)`
`163`		`-#defineUSE_BARRIER_SMGRRELEASE`
`164`		`-#endif`
`165`		`-`
`166`	`155`	`/*`
`167`	`156`	`* Define this if your operating system supports link()`
`168`	`157`	`*/`

`‎src/test/recovery/Makefile‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@`
`9`	`9`	`#`
`10`	`10`	`#-------------------------------------------------------------------------`
`11`	`11`
`12`		`-EXTRA_INSTALL=contrib/test_decoding`
	`12`	`+EXTRA_INSTALL=contrib/test_decoding contrib/pg_prewarm`
`13`	`13`
`14`	`14`	`subdir = src/test/recovery`
`15`	`15`	`top_builddir = ../../..`

`‎src/test/recovery/t/032_relfilenode_reuse.pl‎`

Lines changed: 233 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,233 @@`
	`1`	`+use strict;`
	`2`	`+use warnings;`
	`3`	`+use PostgreSQL::Test::Cluster;`
	`4`	`+use PostgreSQL::Test::Utils;`
	`5`	`+use Test::More;`
	`6`	`+use File::Basename;`
	`7`	`+`
	`8`	`+`
	`9`	`+my$node_primary = PostgreSQL::Test::Cluster->new('primary');`
	`10`	`+$node_primary->init(allows_streaming=> 1);`
	`11`	`+$node_primary->append_conf('postgresql.conf',q[`
	`12`	`+allow_in_place_tablespaces = true`
	`13`	`+log_connections=on`
	`14`	`+# to avoid "repairing" corruption`
	`15`	`+full_page_writes=off`
	`16`	`+log_min_messages=debug2`
	`17`	`+autovacuum_naptime=1s`
	`18`	`+shared_buffers=1MB`
	`19`	`+]);`
	`20`	`+$node_primary->start;`
	`21`	`+`
	`22`	`+`
	`23`	`+# Create streaming standby linking to primary`
	`24`	`+my$backup_name ='my_backup';`
	`25`	`+$node_primary->backup($backup_name);`
	`26`	`+my$node_standby = PostgreSQL::Test::Cluster->new('standby');`
	`27`	`+$node_standby->init_from_backup($node_primary,$backup_name,`
	`28`	`+has_streaming=> 1);`
	`29`	`+$node_standby->start;`
	`30`	`+`
	`31`	`+# To avoid hanging while expecting some specific input from a psql`
	`32`	`+# instance being driven by us, add a timeout high enough that it`
	`33`	`+# should never trigger even on very slow machines, unless something`
	`34`	`+# is really wrong.`
	`35`	`+my$psql_timeout = IPC::Run::timer(300);`
	`36`	`+`
	`37`	`+my%psql_primary = (stdin=>'',stdout=>'',stderr=>'');`
	`38`	`+$psql_primary{run} = IPC::Run::start(`
	`39`	`+['psql','-XA','-f','-','-d',$node_primary->connstr('postgres') ],`
	`40`	`+'<',`
	`41`	`+\$psql_primary{stdin},`
	`42`	`+'>',`
	`43`	`+\$psql_primary{stdout},`
	`44`	`+'2>',`
	`45`	`+\$psql_primary{stderr},`
	`46`	`+$psql_timeout);`
	`47`	`+`
	`48`	`+my%psql_standby = ('stdin'=>'','stdout'=>'','stderr'=>'');`
	`49`	`+$psql_standby{run} = IPC::Run::start(`
	`50`	`+['psql','-XA','-f','-','-d',$node_standby->connstr('postgres') ],`
	`51`	`+'<',`
	`52`	`+\$psql_standby{stdin},`
	`53`	`+'>',`
	`54`	`+\$psql_standby{stdout},`
	`55`	`+'2>',`
	`56`	`+\$psql_standby{stderr},`
	`57`	`+$psql_timeout);`
	`58`	`+`
	`59`	`+`
	`60`	`+# Create template database with a table that we'll update, to trigger dirty`
	`61`	`+# rows. Using a template database + preexisting rows makes it a bit easier to`
	`62`	`+# reproduce, because there's no cache invalidations generated.`
	`63`	`+`
	`64`	`+$node_primary->safe_psql('postgres',"CREATE DATABASE conflict_db_template OID = 50000;");`
	`65`	`+$node_primary->safe_psql('conflict_db_template',q[`
	`66`	`+ CREATE TABLE large(id serial primary key, dataa text, datab text);`
	`67`	`+ INSERT INTO large(dataa, datab) SELECT g.i::text, 1 FROM generate_series(1, 4000) g(i);]);`
	`68`	`+$node_primary->safe_psql('postgres',"CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");`
	`69`	`+`
	`70`	`+$node_primary->safe_psql('postgres',q[`
	`71`	`+ CREATE EXTENSION pg_prewarm;`
	`72`	`+ CREATE TABLE replace_sb(data text);`
	`73`	`+ INSERT INTO replace_sb(data) SELECT random()::text FROM generate_series(1, 15000);]);`
	`74`	`+`
	`75`	`+# Use longrunning transactions, so that AtEOXact_SMgr doesn't close files`
	`76`	`+send_query_and_wait(`
	`77`	`+\%psql_primary,`
	`78`	`+q[BEGIN;],`
	`79`	`+qr/BEGIN/m);`
	`80`	`+send_query_and_wait(`
	`81`	`+\%psql_standby,`
	`82`	`+q[BEGIN;],`
	`83`	`+qr/BEGIN/m);`
	`84`	`+`
	`85`	`+# Cause lots of dirty rows in shared_buffers`
	`86`	`+$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 1;");`
	`87`	`+`
	`88`	`+# Now do a bunch of work in another database. That will end up needing to`
	`89`	`+# write back dirty data from the previous step, opening the relevant file`
	`90`	`+# descriptors`
	`91`	`+cause_eviction(\%psql_primary, \%psql_standby);`
	`92`	`+`
	`93`	`+# drop and recreate database`
	`94`	`+$node_primary->safe_psql('postgres',"DROP DATABASE conflict_db;");`
	`95`	`+$node_primary->safe_psql('postgres',"CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");`
	`96`	`+`
	`97`	`+verify($node_primary,$node_standby, 1,`
	`98`	`+"initial contents as expected");`
	`99`	`+`
	`100`	`+# Again cause lots of dirty rows in shared_buffers, but use a different update`
	`101`	`+# value so we can check everything is OK`
	`102`	`+$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 2;");`
	`103`	`+`
	`104`	`+# Again cause a lot of IO. That'll again write back dirty data, but uses (XXX`
	`105`	`+# adjust after bugfix) the already opened file descriptor.`
	`106`	`+# FIXME`
	`107`	`+cause_eviction(\%psql_primary, \%psql_standby);`
	`108`	`+`
	`109`	`+verify($node_primary,$node_standby, 2,`
	`110`	`+"update to reused relfilenode (due to DB oid conflict) is not lost");`
	`111`	`+`
	`112`	`+`
	`113`	`+$node_primary->safe_psql('conflict_db',"VACUUM FULL large;");`
	`114`	`+$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 3;");`
	`115`	`+`
	`116`	`+verify($node_primary,$node_standby, 3,`
	`117`	`+"restored contents as expected");`
	`118`	`+`
	`119`	`+# Test for old filehandles after moving a database in / out of tablespace`
	`120`	`+$node_primary->safe_psql('postgres',q[CREATE TABLESPACE test_tablespace LOCATION '']);`
	`121`	`+`
	`122`	`+# cause dirty buffers`
	`123`	`+$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 4;");`
	`124`	`+# cause files to be opened in backend in other database`
	`125`	`+cause_eviction(\%psql_primary, \%psql_standby);`
	`126`	`+`
	`127`	`+# move database back / forth`
	`128`	`+$node_primary->safe_psql('postgres','ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');`
	`129`	`+$node_primary->safe_psql('postgres','ALTER DATABASE conflict_db SET TABLESPACE pg_default');`
	`130`	`+`
	`131`	`+# cause dirty buffers`
	`132`	`+$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 5;");`
	`133`	`+cause_eviction(\%psql_primary, \%psql_standby);`
	`134`	`+`
	`135`	`+verify($node_primary,$node_standby, 5,`
	`136`	`+"post move contents as expected");`
	`137`	`+`
	`138`	`+$node_primary->safe_psql('postgres','ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');`
	`139`	`+`
	`140`	`+$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 7;");`
	`141`	`+cause_eviction(\%psql_primary, \%psql_standby);`
	`142`	`+$node_primary->safe_psql('conflict_db',"UPDATE large SET datab = 8;");`
	`143`	`+$node_primary->safe_psql('postgres','DROP DATABASE conflict_db');`
	`144`	`+$node_primary->safe_psql('postgres','DROP TABLESPACE test_tablespace');`
	`145`	`+`
	`146`	`+$node_primary->safe_psql('postgres','REINDEX TABLE pg_database');`
	`147`	`+`
	`148`	`+`
	`149`	`+# explicitly shut down psql instances gracefully - to avoid hangs`
	`150`	`+# or worse on windows`
	`151`	`+$psql_primary{stdin} .="\\q\n";`
	`152`	`+$psql_primary{run}->finish;`
	`153`	`+$psql_standby{stdin} .="\\q\n";`
	`154`	`+$psql_standby{run}->finish;`
	`155`	`+`
	`156`	`+$node_primary->stop();`
	`157`	`+$node_standby->stop();`
	`158`	`+`
	`159`	`+# Make sure that there weren't crashes during shutdown`
	`160`	`+`
	`161`	`+command_like(['pg_controldata',$node_primary->data_dir ],`
	`162`	`+qr/Database cluster state:\s+shut down\n/,'primary shut down ok');`
	`163`	`+command_like(['pg_controldata',$node_standby->data_dir ],`
	`164`	`+qr/Database cluster state:\s+shut down in recovery\n/,'standby shut down ok');`
	`165`	`+done_testing();`
	`166`	`+`
	`167`	`+subverify`
	`168`	`+{`
	`169`	`+my ($primary,$standby,$counter,$message) =@_;`
	`170`	`+`
	`171`	`+my$query ="SELECT datab, count(*) FROM large GROUP BY 1 ORDER BY 1 LIMIT 10";`
	`172`	`+is($primary->safe_psql('conflict_db',$query),`
	`173`	`+"$counter\|4000",`
	`174`	`+"primary:$message");`
	`175`	`+`
	`176`	`+$primary->wait_for_catchup($standby);`
	`177`	`+is($standby->safe_psql('conflict_db',$query),`
	`178`	`+"$counter\|4000",`
	`179`	`+"standby:$message");`
	`180`	`+}`
	`181`	`+`
	`182`	`+subcause_eviction`
	`183`	`+{`
	`184`	`+my ($psql_primary,$psql_standby) =@_;`
	`185`	`+`
	`186`	`+send_query_and_wait(`
	`187`	`+$psql_primary,`
	`188`	`+q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],`
	`189`	`+qr/warmed_buffers/m);`
	`190`	`+`
	`191`	`+send_query_and_wait(`
	`192`	`+$psql_standby,`
	`193`	`+q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],`
	`194`	`+qr/warmed_buffers/m);`
	`195`	`+}`
	`196`	`+`
	`197`	`+# Send query, wait until string matches`
	`198`	`+subsend_query_and_wait`
	`199`	`+{`
	`200`	`+my ($psql,$query,$untl) =@_;`
	`201`	`+my$ret;`
	`202`	`+`
	`203`	`+# send query`
	`204`	`+$$psql{stdin} .=$query;`
	`205`	`+$$psql{stdin} .="\n";`
	`206`	`+`
	`207`	`+# wait for query results`
	`208`	`+$$psql{run}->pump_nb();`
	`209`	`+while (1)`
	`210`	`+{`
	`211`	`+lastif$$psql{stdout} =~/$untl/;`
	`212`	`+`
	`213`	`+if ($psql_timeout->is_expired)`
	`214`	`+{`
	`215`	`+BAIL_OUT("aborting wait: program timed out\n"`
	`216`	`+ ."stream contents: >>$$psql{stdout}<<\n"`
	`217`	`+ ."pattern searched for:$untl\n");`
	`218`	`+return 0;`
	`219`	`+}`
	`220`	`+if (not$$psql{run}->pumpable())`
	`221`	`+{`
	`222`	`+BAIL_OUT("aborting wait: program died\n"`
	`223`	`+ ."stream contents: >>$$psql{stdout}<<\n"`
	`224`	`+ ."pattern searched for:$untl\n");`
	`225`	`+return 0;`
	`226`	`+}`
	`227`	`+$$psql{run}->pump();`
	`228`	`+}`
	`229`	`+`
	`230`	`+$$psql{stdout} ='';`
	`231`	`+`
	`232`	`+return 1;`
	`233`	`+}`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commite2f65f4

File tree

5 files changed

5 files changed

`‎src/backend/commands/dbcommands.c‎`

`‎src/backend/commands/tablespace.c‎`

`‎src/include/pg_config_manual.h‎`

`‎src/test/recovery/Makefile‎`

`‎src/test/recovery/t/032_relfilenode_reuse.pl‎`

0 commit comments