Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0869e53

Browse files
committed
Avoid race in RelationBuildDesc() affecting CREATE INDEX CONCURRENTLY.
CIC and REINDEX CONCURRENTLY assume backends see their catalog changesno later than each backend's next transaction start. That failed tohold when a backend absorbed a relevant invalidation in the middle ofrunning RelationBuildDesc() on the CIC index. Queries that use theresulting index can silently fail to find rows. Fix this for futureindex builds by making RelationBuildDesc() loop until it finisheswithout accepting a relevant invalidation. It may be necessary toreindex to recover from past occurrences; REINDEX CONCURRENTLY suffices.Back-patch to 9.6 (all supported versions).Noah Misch and Andrey Borodin, reviewed (in earlier versions) by AndresFreund.Discussion:https://postgr.es/m/20210730022548.GA1940096@gust.leadboat.com
1 parentfb1aa48 commit0869e53

File tree

8 files changed

+331
-5
lines changed

8 files changed

+331
-5
lines changed

‎contrib/amcheck/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ PGFILEDESC = "amcheck - function for verifying relation integrity"
99

1010
REGRESS = check check_btree
1111

12+
TAP_TESTS = 1
13+
1214
ifdefUSE_PGXS
1315
PG_CONFIG = pg_config
1416
PGXS :=$(shell$(PG_CONFIG) --pgxs)

‎contrib/amcheck/t/002_cic.pl

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
2+
# Copyright (c) 2021, PostgreSQL Global Development Group
3+
4+
# Test CREATE INDEX CONCURRENTLY with concurrent modifications
5+
use strict;
6+
use warnings;
7+
8+
use Config;
9+
use PostgresNode;
10+
use TestLib;
11+
12+
use Test::Moretests=> 4;
13+
14+
my ($node,$result);
15+
16+
#
17+
# Test set-up
18+
#
19+
$node = get_new_node('CIC_test');
20+
$node->init;
21+
$node->append_conf('postgresql.conf','lock_timeout = 180000');
22+
$node->start;
23+
$node->safe_psql('postgres',q(CREATE EXTENSION amcheck));
24+
$node->safe_psql('postgres',q(CREATE TABLE tbl(i int)));
25+
$node->safe_psql('postgres',q(CREATE INDEX idx ON tbl(i)));
26+
27+
#
28+
# Stress CIC with pgbench
29+
#
30+
31+
# Run background pgbench with CIC. We cannot mix-in this script into single
32+
# pgbench: CIC will deadlock with itself occasionally.
33+
my$pgbench_out ='';
34+
my$pgbench_timer = IPC::Run::timeout(180);
35+
my$pgbench_h =$node->background_pgbench(
36+
'--no-vacuum --client=1 --transactions=200',
37+
{
38+
'002_pgbench_concurrent_cic'=>q(
39+
DROP INDEX CONCURRENTLY idx;
40+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
41+
SELECT bt_index_check('idx',true);
42+
)
43+
},
44+
\$pgbench_out,
45+
$pgbench_timer);
46+
47+
# Run pgbench.
48+
$node->pgbench(
49+
'--no-vacuum --client=5 --transactions=200',
50+
0,
51+
[qr{actually processed}],
52+
[qr{^$}],
53+
'concurrent INSERTs',
54+
{
55+
'002_pgbench_concurrent_transaction'=>q(
56+
BEGIN;
57+
INSERT INTO tbl VALUES(0);
58+
COMMIT;
59+
),
60+
'002_pgbench_concurrent_transaction_savepoints'=>q(
61+
BEGIN;
62+
SAVEPOINT s1;
63+
INSERT INTO tbl VALUES(0);
64+
COMMIT;
65+
)
66+
});
67+
68+
$pgbench_h->pump_nb;
69+
$pgbench_h->finish();
70+
$result =
71+
($Config{osname}eq"MSWin32")
72+
? ($pgbench_h->full_results)[0]
73+
:$pgbench_h->result(0);
74+
is($result, 0,"pgbench with CIC works");
75+
76+
# done
77+
$node->stop;
78+
done_testing();

‎src/backend/utils/cache/inval.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
584584
inti;
585585

586586
if (msg->rc.relId==InvalidOid)
587-
RelationCacheInvalidate();
587+
RelationCacheInvalidate(false);
588588
else
589589
RelationCacheInvalidateEntry(msg->rc.relId);
590590

@@ -641,12 +641,18 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
641641
*/
642642
void
643643
InvalidateSystemCaches(void)
644+
{
645+
InvalidateSystemCachesExtended(false);
646+
}
647+
648+
void
649+
InvalidateSystemCachesExtended(booldebug_discard)
644650
{
645651
inti;
646652

647653
InvalidateCatalogSnapshot();
648654
ResetCatalogCaches();
649-
RelationCacheInvalidate();/* gets smgr and relmap too */
655+
RelationCacheInvalidate(debug_discard);/* gets smgr and relmap too */
650656

651657
for (i=0;i<syscache_callback_count;i++)
652658
{
@@ -717,7 +723,7 @@ AcceptInvalidationMessages(void)
717723
if (recursion_depth<3)
718724
{
719725
recursion_depth++;
720-
InvalidateSystemCaches();
726+
InvalidateSystemCachesExtended(true);
721727
recursion_depth--;
722728
}
723729
}

‎src/backend/utils/cache/relcache.c

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,24 @@ boolcriticalSharedRelcachesBuilt = false;
154154
*/
155155
staticlongrelcacheInvalsReceived=0L;
156156

157+
/*
158+
* in_progress_list is a stack of ongoing RelationBuildDesc() calls. CREATE
159+
* INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock.
160+
* It critically relies on each backend absorbing those changes no later than
161+
* next transaction start. Hence, RelationBuildDesc() loops until it finishes
162+
* without accepting a relevant invalidation. (Most invalidation consumers
163+
* don't do this.)
164+
*/
165+
typedefstructinprogressent
166+
{
167+
Oidreloid;/* OID of relation being built */
168+
boolinvalidated;/* whether an invalidation arrived for it */
169+
}InProgressEnt;
170+
171+
staticInProgressEnt*in_progress_list;
172+
staticintin_progress_list_len;
173+
staticintin_progress_list_maxlen;
174+
157175
/*
158176
* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
159177
* cleanup work. This list intentionally has limited size; if it overflows,
@@ -1042,6 +1060,7 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
10421060
staticRelation
10431061
RelationBuildDesc(OidtargetRelId,boolinsertIt)
10441062
{
1063+
intin_progress_offset;
10451064
Relationrelation;
10461065
Oidrelid;
10471066
HeapTuplepg_class_tuple;
@@ -1069,6 +1088,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
10691088
oldcxt=MemoryContextSwitchTo(tmpcxt);
10701089
#endif
10711090

1091+
/* Register to catch invalidation messages */
1092+
if (in_progress_list_len >=in_progress_list_maxlen)
1093+
{
1094+
intallocsize;
1095+
1096+
allocsize=in_progress_list_maxlen*2;
1097+
in_progress_list=repalloc(in_progress_list,
1098+
allocsize*sizeof(*in_progress_list));
1099+
in_progress_list_maxlen=allocsize;
1100+
}
1101+
in_progress_offset=in_progress_list_len++;
1102+
in_progress_list[in_progress_offset].reloid=targetRelId;
1103+
retry:
1104+
in_progress_list[in_progress_offset].invalidated= false;
1105+
10721106
/*
10731107
* find the tuple in pg_class corresponding to the given relation id
10741108
*/
@@ -1084,6 +1118,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
10841118
MemoryContextSwitchTo(oldcxt);
10851119
MemoryContextDelete(tmpcxt);
10861120
#endif
1121+
Assert(in_progress_offset+1==in_progress_list_len);
1122+
in_progress_list_len--;
10871123
returnNULL;
10881124
}
10891125

@@ -1251,6 +1287,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
12511287
*/
12521288
heap_freetuple(pg_class_tuple);
12531289

1290+
/*
1291+
* If an invalidation arrived mid-build, start over. Between here and the
1292+
* end of this function, don't add code that does or reasonably could read
1293+
* system catalogs. That range must be free from invalidation processing
1294+
* for the !insertIt case. For the insertIt case, RelationCacheInsert()
1295+
* will enroll this relation in ordinary relcache invalidation processing,
1296+
*/
1297+
if (in_progress_list[in_progress_offset].invalidated)
1298+
{
1299+
RelationDestroyRelation(relation, false);
1300+
gotoretry;
1301+
}
1302+
Assert(in_progress_offset+1==in_progress_list_len);
1303+
in_progress_list_len--;
1304+
12541305
/*
12551306
* Insert newly created relation into relcache hash table, if requested.
12561307
*
@@ -2554,6 +2605,14 @@ RelationClearRelation(Relation relation, bool rebuild)
25542605

25552606
/* Build temporary entry, but don't link it into hashtable */
25562607
newrel=RelationBuildDesc(save_relid, false);
2608+
2609+
/*
2610+
* Between here and the end of the swap, don't add code that does or
2611+
* reasonably could read system catalogs. That range must be free
2612+
* from invalidation processing. See RelationBuildDesc() manipulation
2613+
* of in_progress_list.
2614+
*/
2615+
25572616
if (newrel==NULL)
25582617
{
25592618
/*
@@ -2765,6 +2824,14 @@ RelationCacheInvalidateEntry(Oid relationId)
27652824
relcacheInvalsReceived++;
27662825
RelationFlushRelation(relation);
27672826
}
2827+
else
2828+
{
2829+
inti;
2830+
2831+
for (i=0;i<in_progress_list_len;i++)
2832+
if (in_progress_list[i].reloid==relationId)
2833+
in_progress_list[i].invalidated= true;
2834+
}
27682835
}
27692836

27702837
/*
@@ -2796,16 +2863,22 @@ RelationCacheInvalidateEntry(Oid relationId)
27962863
* second pass processes nailed-in-cache items before other nondeletable
27972864
* items. This should ensure that system catalogs are up to date before
27982865
* we attempt to use them to reload information about other open relations.
2866+
*
2867+
* After those two phases of work having immediate effects, we normally
2868+
* signal any RelationBuildDesc() on the stack to start over. However, we
2869+
* don't do this if called as part of debug_discard_caches. Otherwise,
2870+
* RelationBuildDesc() would become an infinite loop.
27992871
*/
28002872
void
2801-
RelationCacheInvalidate(void)
2873+
RelationCacheInvalidate(booldebug_discard)
28022874
{
28032875
HASH_SEQ_STATUSstatus;
28042876
RelIdCacheEnt*idhentry;
28052877
Relationrelation;
28062878
List*rebuildFirstList=NIL;
28072879
List*rebuildList=NIL;
28082880
ListCell*l;
2881+
inti;
28092882

28102883
/*
28112884
* Reload relation mapping data before starting to reconstruct cache.
@@ -2892,6 +2965,11 @@ RelationCacheInvalidate(void)
28922965
RelationClearRelation(relation, true);
28932966
}
28942967
list_free(rebuildList);
2968+
2969+
if (!debug_discard)
2970+
/* Any RelationBuildDesc() on the stack must start over. */
2971+
for (i=0;i<in_progress_list_len;i++)
2972+
in_progress_list[i].invalidated= true;
28952973
}
28962974

28972975
/*
@@ -2964,6 +3042,13 @@ AtEOXact_RelationCache(bool isCommit)
29643042
RelIdCacheEnt*idhentry;
29653043
inti;
29663044

3045+
/*
3046+
* Forget in_progress_list. This is relevant when we're aborting due to
3047+
* an error during RelationBuildDesc().
3048+
*/
3049+
Assert(in_progress_list_len==0|| !isCommit);
3050+
in_progress_list_len=0;
3051+
29673052
/*
29683053
* Unless the eoxact_list[] overflowed, we only need to examine the rels
29693054
* listed in it. Otherwise fall back on a hash_seq_search scan.
@@ -3100,6 +3185,14 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
31003185
RelIdCacheEnt*idhentry;
31013186
inti;
31023187

3188+
/*
3189+
* Forget in_progress_list. This is relevant when we're aborting due to
3190+
* an error during RelationBuildDesc(). We don't commit subtransactions
3191+
* during RelationBuildDesc().
3192+
*/
3193+
Assert(in_progress_list_len==0|| !isCommit);
3194+
in_progress_list_len=0;
3195+
31033196
/*
31043197
* Unless the eoxact_list[] overflowed, we only need to examine the rels
31053198
* listed in it. Otherwise fall back on a hash_seq_search scan. Same
@@ -3601,6 +3694,7 @@ void
36013694
RelationCacheInitialize(void)
36023695
{
36033696
HASHCTLctl;
3697+
intallocsize;
36043698

36053699
/*
36063700
* make sure cache memory context exists
@@ -3617,6 +3711,15 @@ RelationCacheInitialize(void)
36173711
RelationIdCache=hash_create("Relcache by OID",INITRELCACHESIZE,
36183712
&ctl,HASH_ELEM |HASH_BLOBS);
36193713

3714+
/*
3715+
* reserve enough in_progress_list slots for many cases
3716+
*/
3717+
allocsize=4;
3718+
in_progress_list=
3719+
MemoryContextAlloc(CacheMemoryContext,
3720+
allocsize*sizeof(*in_progress_list));
3721+
in_progress_list_maxlen=allocsize;
3722+
36203723
/*
36213724
* relation mapper needs to be initialized too
36223725
*/

‎src/include/utils/inval.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,5 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
6161
externvoidCallSyscacheCallbacks(intcacheid,uint32hashvalue);
6262

6363
externvoidInvalidateSystemCaches(void);
64+
externvoidInvalidateSystemCachesExtended(booldebug_discard);
6465
#endif/* INVAL_H */

‎src/include/utils/relcache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ extern void RelationForgetRelation(Oid rid);
117117

118118
externvoidRelationCacheInvalidateEntry(OidrelationId);
119119

120-
externvoidRelationCacheInvalidate(void);
120+
externvoidRelationCacheInvalidate(booldebug_discard);
121121

122122
externvoidRelationCloseSmgrByOid(OidrelationId);
123123

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp