Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitfdd965d

Browse files
committed
Avoid race in RelationBuildDesc() affecting CREATE INDEX CONCURRENTLY.
CIC and REINDEX CONCURRENTLY assume backends see their catalog changesno later than each backend's next transaction start. That failed tohold when a backend absorbed a relevant invalidation in the middle ofrunning RelationBuildDesc() on the CIC index. Queries that use theresulting index can silently fail to find rows. Fix this for futureindex builds by making RelationBuildDesc() loop until it finisheswithout accepting a relevant invalidation. It may be necessary toreindex to recover from past occurrences; REINDEX CONCURRENTLY suffices.Back-patch to 9.6 (all supported versions).Noah Misch and Andrey Borodin, reviewed (in earlier versions) by AndresFreund.Discussion:https://postgr.es/m/20210730022548.GA1940096@gust.leadboat.com
1 parent1e94756 commitfdd965d

File tree

8 files changed

+368
-93
lines changed

8 files changed

+368
-93
lines changed

‎contrib/amcheck/t/002_cic.pl

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
2+
# Copyright (c) 2021, PostgreSQL Global Development Group
3+
4+
# Test CREATE INDEX CONCURRENTLY with concurrent modifications
5+
use strict;
6+
use warnings;
7+
8+
use Config;
9+
use PostgresNode;
10+
use TestLib;
11+
12+
use Test::Moretests=> 4;
13+
14+
my ($node,$result);
15+
16+
#
17+
# Test set-up
18+
#
19+
$node = PostgresNode->new('CIC_test');
20+
$node->init;
21+
$node->append_conf('postgresql.conf','lock_timeout = 180000');
22+
$node->start;
23+
$node->safe_psql('postgres',q(CREATE EXTENSION amcheck));
24+
$node->safe_psql('postgres',q(CREATE TABLE tbl(i int)));
25+
$node->safe_psql('postgres',q(CREATE INDEX idx ON tbl(i)));
26+
27+
#
28+
# Stress CIC with pgbench
29+
#
30+
31+
# Run background pgbench with CIC. We cannot mix-in this script into single
32+
# pgbench: CIC will deadlock with itself occasionally.
33+
my$pgbench_out ='';
34+
my$pgbench_timer = IPC::Run::timeout(180);
35+
my$pgbench_h =$node->background_pgbench(
36+
'--no-vacuum --client=1 --transactions=200',
37+
{
38+
'002_pgbench_concurrent_cic'=>q(
39+
DROP INDEX CONCURRENTLY idx;
40+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
41+
SELECT bt_index_check('idx',true);
42+
)
43+
},
44+
\$pgbench_out,
45+
$pgbench_timer);
46+
47+
# Run pgbench.
48+
$node->pgbench(
49+
'--no-vacuum --client=5 --transactions=200',
50+
0,
51+
[qr{actually processed}],
52+
[qr{^$}],
53+
'concurrent INSERTs',
54+
{
55+
'002_pgbench_concurrent_transaction'=>q(
56+
BEGIN;
57+
INSERT INTO tbl VALUES(0);
58+
COMMIT;
59+
),
60+
'002_pgbench_concurrent_transaction_savepoints'=>q(
61+
BEGIN;
62+
SAVEPOINT s1;
63+
INSERT INTO tbl VALUES(0);
64+
COMMIT;
65+
)
66+
});
67+
68+
$pgbench_h->pump_nb;
69+
$pgbench_h->finish();
70+
$result =
71+
($Config{osname}eq"MSWin32")
72+
? ($pgbench_h->full_results)[0]
73+
:$pgbench_h->result(0);
74+
is($result, 0,"pgbench with CIC works");
75+
76+
# done
77+
$node->stop;
78+
done_testing();

‎src/backend/utils/cache/inval.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
628628
inti;
629629

630630
if (msg->rc.relId==InvalidOid)
631-
RelationCacheInvalidate();
631+
RelationCacheInvalidate(false);
632632
else
633633
RelationCacheInvalidateEntry(msg->rc.relId);
634634

@@ -685,12 +685,18 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
685685
*/
686686
void
687687
InvalidateSystemCaches(void)
688+
{
689+
InvalidateSystemCachesExtended(false);
690+
}
691+
692+
void
693+
InvalidateSystemCachesExtended(booldebug_discard)
688694
{
689695
inti;
690696

691697
InvalidateCatalogSnapshot();
692698
ResetCatalogCaches();
693-
RelationCacheInvalidate();/* gets smgr and relmap too */
699+
RelationCacheInvalidate(debug_discard);/* gets smgr and relmap too */
694700

695701
for (i=0;i<syscache_callback_count;i++)
696702
{
@@ -759,7 +765,7 @@ AcceptInvalidationMessages(void)
759765
if (recursion_depth<debug_discard_caches)
760766
{
761767
recursion_depth++;
762-
InvalidateSystemCaches();
768+
InvalidateSystemCachesExtended(true);
763769
recursion_depth--;
764770
}
765771
}

‎src/backend/utils/cache/relcache.c

Lines changed: 109 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,24 @@ boolcriticalSharedRelcachesBuilt = false;
150150
*/
151151
staticlongrelcacheInvalsReceived=0L;
152152

153+
/*
154+
* in_progress_list is a stack of ongoing RelationBuildDesc() calls. CREATE
155+
* INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock.
156+
* It critically relies on each backend absorbing those changes no later than
157+
* next transaction start. Hence, RelationBuildDesc() loops until it finishes
158+
* without accepting a relevant invalidation. (Most invalidation consumers
159+
* don't do this.)
160+
*/
161+
typedefstructinprogressent
162+
{
163+
Oidreloid;/* OID of relation being built */
164+
boolinvalidated;/* whether an invalidation arrived for it */
165+
}InProgressEnt;
166+
167+
staticInProgressEnt*in_progress_list;
168+
staticintin_progress_list_len;
169+
staticintin_progress_list_maxlen;
170+
153171
/*
154172
* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
155173
* cleanup work. This list intentionally has limited size; if it overflows,
@@ -1000,6 +1018,7 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
10001018
staticRelation
10011019
RelationBuildDesc(OidtargetRelId,boolinsertIt)
10021020
{
1021+
intin_progress_offset;
10031022
Relationrelation;
10041023
Oidrelid;
10051024
HeapTuplepg_class_tuple;
@@ -1033,6 +1052,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
10331052
}
10341053
#endif
10351054

1055+
/* Register to catch invalidation messages */
1056+
if (in_progress_list_len >=in_progress_list_maxlen)
1057+
{
1058+
intallocsize;
1059+
1060+
allocsize=in_progress_list_maxlen*2;
1061+
in_progress_list=repalloc(in_progress_list,
1062+
allocsize*sizeof(*in_progress_list));
1063+
in_progress_list_maxlen=allocsize;
1064+
}
1065+
in_progress_offset=in_progress_list_len++;
1066+
in_progress_list[in_progress_offset].reloid=targetRelId;
1067+
retry:
1068+
in_progress_list[in_progress_offset].invalidated= false;
1069+
10361070
/*
10371071
* find the tuple in pg_class corresponding to the given relation id
10381072
*/
@@ -1051,6 +1085,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
10511085
MemoryContextDelete(tmpcxt);
10521086
}
10531087
#endif
1088+
Assert(in_progress_offset+1==in_progress_list_len);
1089+
in_progress_list_len--;
10541090
returnNULL;
10551091
}
10561092

@@ -1213,6 +1249,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
12131249
*/
12141250
heap_freetuple(pg_class_tuple);
12151251

1252+
/*
1253+
* If an invalidation arrived mid-build, start over. Between here and the
1254+
* end of this function, don't add code that does or reasonably could read
1255+
* system catalogs. That range must be free from invalidation processing
1256+
* for the !insertIt case. For the insertIt case, RelationCacheInsert()
1257+
* will enroll this relation in ordinary relcache invalidation processing,
1258+
*/
1259+
if (in_progress_list[in_progress_offset].invalidated)
1260+
{
1261+
RelationDestroyRelation(relation, false);
1262+
gotoretry;
1263+
}
1264+
Assert(in_progress_offset+1==in_progress_list_len);
1265+
in_progress_list_len--;
1266+
12161267
/*
12171268
* Insert newly created relation into relcache hash table, if requested.
12181269
*
@@ -2566,6 +2617,14 @@ RelationClearRelation(Relation relation, bool rebuild)
25662617

25672618
/* Build temporary entry, but don't link it into hashtable */
25682619
newrel=RelationBuildDesc(save_relid, false);
2620+
2621+
/*
2622+
* Between here and the end of the swap, don't add code that does or
2623+
* reasonably could read system catalogs. That range must be free
2624+
* from invalidation processing. See RelationBuildDesc() manipulation
2625+
* of in_progress_list.
2626+
*/
2627+
25692628
if (newrel==NULL)
25702629
{
25712630
/*
@@ -2805,6 +2864,14 @@ RelationCacheInvalidateEntry(Oid relationId)
28052864
relcacheInvalsReceived++;
28062865
RelationFlushRelation(relation);
28072866
}
2867+
else
2868+
{
2869+
inti;
2870+
2871+
for (i=0;i<in_progress_list_len;i++)
2872+
if (in_progress_list[i].reloid==relationId)
2873+
in_progress_list[i].invalidated= true;
2874+
}
28082875
}
28092876

28102877
/*
@@ -2813,11 +2880,11 @@ RelationCacheInvalidateEntry(Oid relationId)
28132880
* and rebuild those with positive reference counts. Also reset the smgr
28142881
* relation cache and re-read relation mapping data.
28152882
*
2816-
*Thisis currently used only to recover from SI message buffer overflow,
2817-
* so we do not touch relations having new-in-transaction relfilenodes; they
2818-
* cannot be targets of cross-backend SI updates (and our own updates now go
2819-
*through a separate linked list that isn't limited by the SI message
2820-
* buffer size).
2883+
*Apart from debug_discard_caches, thisis currently used only to recover
2884+
*from SI message buffer overflow,so we do not touch relations having
2885+
*new-in-transaction relfilenodes; theycannot be targets of cross-backend
2886+
*SI updates (and our own updates now go through a separate linked list
2887+
*that isn't limited by the SI messagebuffer size).
28212888
*
28222889
* We do this in two phases: the first pass deletes deletable items, and
28232890
* the second one rebuilds the rebuildable items. This is essential for
@@ -2835,16 +2902,22 @@ RelationCacheInvalidateEntry(Oid relationId)
28352902
* second pass processes nailed-in-cache items before other nondeletable
28362903
* items. This should ensure that system catalogs are up to date before
28372904
* we attempt to use them to reload information about other open relations.
2905+
*
2906+
* After those two phases of work having immediate effects, we normally
2907+
* signal any RelationBuildDesc() on the stack to start over. However, we
2908+
* don't do this if called as part of debug_discard_caches. Otherwise,
2909+
* RelationBuildDesc() would become an infinite loop.
28382910
*/
28392911
void
2840-
RelationCacheInvalidate(void)
2912+
RelationCacheInvalidate(booldebug_discard)
28412913
{
28422914
HASH_SEQ_STATUSstatus;
28432915
RelIdCacheEnt*idhentry;
28442916
Relationrelation;
28452917
List*rebuildFirstList=NIL;
28462918
List*rebuildList=NIL;
28472919
ListCell*l;
2920+
inti;
28482921

28492922
/*
28502923
* Reload relation mapping data before starting to reconstruct cache.
@@ -2931,6 +3004,11 @@ RelationCacheInvalidate(void)
29313004
RelationClearRelation(relation, true);
29323005
}
29333006
list_free(rebuildList);
3007+
3008+
if (!debug_discard)
3009+
/* Any RelationBuildDesc() on the stack must start over. */
3010+
for (i=0;i<in_progress_list_len;i++)
3011+
in_progress_list[i].invalidated= true;
29343012
}
29353013

29363014
/*
@@ -3081,6 +3159,13 @@ AtEOXact_RelationCache(bool isCommit)
30813159
RelIdCacheEnt*idhentry;
30823160
inti;
30833161

3162+
/*
3163+
* Forget in_progress_list. This is relevant when we're aborting due to
3164+
* an error during RelationBuildDesc().
3165+
*/
3166+
Assert(in_progress_list_len==0|| !isCommit);
3167+
in_progress_list_len=0;
3168+
30843169
/*
30853170
* Unless the eoxact_list[] overflowed, we only need to examine the rels
30863171
* listed in it. Otherwise fall back on a hash_seq_search scan.
@@ -3227,6 +3312,14 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
32273312
RelIdCacheEnt*idhentry;
32283313
inti;
32293314

3315+
/*
3316+
* Forget in_progress_list. This is relevant when we're aborting due to
3317+
* an error during RelationBuildDesc(). We don't commit subtransactions
3318+
* during RelationBuildDesc().
3319+
*/
3320+
Assert(in_progress_list_len==0|| !isCommit);
3321+
in_progress_list_len=0;
3322+
32303323
/*
32313324
* Unless the eoxact_list[] overflowed, we only need to examine the rels
32323325
* listed in it. Otherwise fall back on a hash_seq_search scan. Same
@@ -3775,6 +3868,7 @@ void
37753868
RelationCacheInitialize(void)
37763869
{
37773870
HASHCTLctl;
3871+
intallocsize;
37783872

37793873
/*
37803874
* make sure cache memory context exists
@@ -3790,6 +3884,15 @@ RelationCacheInitialize(void)
37903884
RelationIdCache=hash_create("Relcache by OID",INITRELCACHESIZE,
37913885
&ctl,HASH_ELEM |HASH_BLOBS);
37923886

3887+
/*
3888+
* reserve enough in_progress_list slots for many cases
3889+
*/
3890+
allocsize=4;
3891+
in_progress_list=
3892+
MemoryContextAlloc(CacheMemoryContext,
3893+
allocsize*sizeof(*in_progress_list));
3894+
in_progress_list_maxlen=allocsize;
3895+
37933896
/*
37943897
* relation mapper needs to be initialized too
37953898
*/

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp