Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2e33b43

Browse files
committed
Avoid race in RelationBuildDesc() affecting CREATE INDEX CONCURRENTLY.
CIC and REINDEX CONCURRENTLY assume backends see their catalog changesno later than each backend's next transaction start. That failed tohold when a backend absorbed a relevant invalidation in the middle ofrunning RelationBuildDesc() on the CIC index. Queries that use theresulting index can silently fail to find rows. Fix this for futureindex builds by making RelationBuildDesc() loop until it finisheswithout accepting a relevant invalidation. It may be necessary toreindex to recover from past occurrences; REINDEX CONCURRENTLY suffices.Back-patch to 9.6 (all supported versions).Noah Misch and Andrey Borodin, reviewed (in earlier versions) by AndresFreund.Discussion:https://postgr.es/m/20210730022548.GA1940096@gust.leadboat.com
1 parent7c949f1 commit2e33b43

File tree

8 files changed

+336
-10
lines changed

8 files changed

+336
-10
lines changed

‎contrib/amcheck/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ PGFILEDESC = "amcheck - function for verifying relation integrity"
1111

1212
REGRESS = check check_btree
1313

14+
TAP_TESTS = 1
15+
1416
ifdefUSE_PGXS
1517
PG_CONFIG = pg_config
1618
PGXS :=$(shell$(PG_CONFIG) --pgxs)

‎contrib/amcheck/t/002_cic.pl

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
2+
# Copyright (c) 2021, PostgreSQL Global Development Group
3+
4+
# Test CREATE INDEX CONCURRENTLY with concurrent modifications
5+
use strict;
6+
use warnings;
7+
8+
use Config;
9+
use PostgresNode;
10+
use TestLib;
11+
12+
use Test::Moretests=> 4;
13+
14+
my ($node,$result);
15+
16+
#
17+
# Test set-up
18+
#
19+
$node = get_new_node('CIC_test');
20+
$node->init;
21+
$node->append_conf('postgresql.conf','lock_timeout = 180000');
22+
$node->start;
23+
$node->safe_psql('postgres',q(CREATE EXTENSION amcheck));
24+
$node->safe_psql('postgres',q(CREATE TABLE tbl(i int)));
25+
$node->safe_psql('postgres',q(CREATE INDEX idx ON tbl(i)));
26+
27+
#
28+
# Stress CIC with pgbench
29+
#
30+
31+
# Run background pgbench with CIC. We cannot mix-in this script into single
32+
# pgbench: CIC will deadlock with itself occasionally.
33+
my$pgbench_out ='';
34+
my$pgbench_timer = IPC::Run::timeout(180);
35+
my$pgbench_h =$node->background_pgbench(
36+
'--no-vacuum --client=1 --transactions=200',
37+
{
38+
'002_pgbench_concurrent_cic'=>q(
39+
DROP INDEX CONCURRENTLY idx;
40+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
41+
SELECT bt_index_check('idx',true);
42+
)
43+
},
44+
\$pgbench_out,
45+
$pgbench_timer);
46+
47+
# Run pgbench.
48+
$node->pgbench(
49+
'--no-vacuum --client=5 --transactions=200',
50+
0,
51+
[qr{actually processed}],
52+
[qr{^$}],
53+
'concurrent INSERTs',
54+
{
55+
'002_pgbench_concurrent_transaction'=>q(
56+
BEGIN;
57+
INSERT INTO tbl VALUES(0);
58+
COMMIT;
59+
),
60+
'002_pgbench_concurrent_transaction_savepoints'=>q(
61+
BEGIN;
62+
SAVEPOINT s1;
63+
INSERT INTO tbl VALUES(0);
64+
COMMIT;
65+
)
66+
});
67+
68+
$pgbench_h->pump_nb;
69+
$pgbench_h->finish();
70+
$result =
71+
($Config{osname}eq"MSWin32")
72+
? ($pgbench_h->full_results)[0]
73+
:$pgbench_h->result(0);
74+
is($result, 0,"pgbench with CIC works");
75+
76+
# done
77+
$node->stop;
78+
done_testing();

‎src/backend/utils/cache/inval.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
584584
inti;
585585

586586
if (msg->rc.relId==InvalidOid)
587-
RelationCacheInvalidate();
587+
RelationCacheInvalidate(false);
588588
else
589589
RelationCacheInvalidateEntry(msg->rc.relId);
590590

@@ -641,12 +641,18 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
641641
*/
642642
void
643643
InvalidateSystemCaches(void)
644+
{
645+
InvalidateSystemCachesExtended(false);
646+
}
647+
648+
void
649+
InvalidateSystemCachesExtended(booldebug_discard)
644650
{
645651
inti;
646652

647653
InvalidateCatalogSnapshot();
648654
ResetCatalogCaches();
649-
RelationCacheInvalidate();/* gets smgr and relmap too */
655+
RelationCacheInvalidate(debug_discard);/* gets smgr and relmap too */
650656

651657
for (i=0;i<syscache_callback_count;i++)
652658
{
@@ -717,7 +723,7 @@ AcceptInvalidationMessages(void)
717723
if (recursion_depth<3)
718724
{
719725
recursion_depth++;
720-
InvalidateSystemCaches();
726+
InvalidateSystemCachesExtended(true);
721727
recursion_depth--;
722728
}
723729
}

‎src/backend/utils/cache/relcache.c

Lines changed: 109 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,24 @@ boolcriticalSharedRelcachesBuilt = false;
150150
*/
151151
staticlongrelcacheInvalsReceived=0L;
152152

153+
/*
154+
* in_progress_list is a stack of ongoing RelationBuildDesc() calls. CREATE
155+
* INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock.
156+
* It critically relies on each backend absorbing those changes no later than
157+
* next transaction start. Hence, RelationBuildDesc() loops until it finishes
158+
* without accepting a relevant invalidation. (Most invalidation consumers
159+
* don't do this.)
160+
*/
161+
typedefstructinprogressent
162+
{
163+
Oidreloid;/* OID of relation being built */
164+
boolinvalidated;/* whether an invalidation arrived for it */
165+
}InProgressEnt;
166+
167+
staticInProgressEnt*in_progress_list;
168+
staticintin_progress_list_len;
169+
staticintin_progress_list_maxlen;
170+
153171
/*
154172
* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
155173
* cleanup work. This list intentionally has limited size; if it overflows,
@@ -1043,6 +1061,7 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
10431061
staticRelation
10441062
RelationBuildDesc(OidtargetRelId,boolinsertIt)
10451063
{
1064+
intin_progress_offset;
10461065
Relationrelation;
10471066
Oidrelid;
10481067
HeapTuplepg_class_tuple;
@@ -1070,6 +1089,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
10701089
oldcxt=MemoryContextSwitchTo(tmpcxt);
10711090
#endif
10721091

1092+
/* Register to catch invalidation messages */
1093+
if (in_progress_list_len >=in_progress_list_maxlen)
1094+
{
1095+
intallocsize;
1096+
1097+
allocsize=in_progress_list_maxlen*2;
1098+
in_progress_list=repalloc(in_progress_list,
1099+
allocsize*sizeof(*in_progress_list));
1100+
in_progress_list_maxlen=allocsize;
1101+
}
1102+
in_progress_offset=in_progress_list_len++;
1103+
in_progress_list[in_progress_offset].reloid=targetRelId;
1104+
retry:
1105+
in_progress_list[in_progress_offset].invalidated= false;
1106+
10731107
/*
10741108
* find the tuple in pg_class corresponding to the given relation id
10751109
*/
@@ -1085,6 +1119,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
10851119
MemoryContextSwitchTo(oldcxt);
10861120
MemoryContextDelete(tmpcxt);
10871121
#endif
1122+
Assert(in_progress_offset+1==in_progress_list_len);
1123+
in_progress_list_len--;
10881124
returnNULL;
10891125
}
10901126

@@ -1244,6 +1280,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
12441280
*/
12451281
heap_freetuple(pg_class_tuple);
12461282

1283+
/*
1284+
* If an invalidation arrived mid-build, start over. Between here and the
1285+
* end of this function, don't add code that does or reasonably could read
1286+
* system catalogs. That range must be free from invalidation processing
1287+
* for the !insertIt case. For the insertIt case, RelationCacheInsert()
1288+
* will enroll this relation in ordinary relcache invalidation processing,
1289+
*/
1290+
if (in_progress_list[in_progress_offset].invalidated)
1291+
{
1292+
RelationDestroyRelation(relation, false);
1293+
gotoretry;
1294+
}
1295+
Assert(in_progress_offset+1==in_progress_list_len);
1296+
in_progress_list_len--;
1297+
12471298
/*
12481299
* Insert newly created relation into relcache hash table, if requested.
12491300
*
@@ -2586,6 +2637,14 @@ RelationClearRelation(Relation relation, bool rebuild)
25862637

25872638
/* Build temporary entry, but don't link it into hashtable */
25882639
newrel=RelationBuildDesc(save_relid, false);
2640+
2641+
/*
2642+
* Between here and the end of the swap, don't add code that does or
2643+
* reasonably could read system catalogs. That range must be free
2644+
* from invalidation processing. See RelationBuildDesc() manipulation
2645+
* of in_progress_list.
2646+
*/
2647+
25892648
if (newrel==NULL)
25902649
{
25912650
/*
@@ -2816,6 +2875,14 @@ RelationCacheInvalidateEntry(Oid relationId)
28162875
relcacheInvalsReceived++;
28172876
RelationFlushRelation(relation);
28182877
}
2878+
else
2879+
{
2880+
inti;
2881+
2882+
for (i=0;i<in_progress_list_len;i++)
2883+
if (in_progress_list[i].reloid==relationId)
2884+
in_progress_list[i].invalidated= true;
2885+
}
28192886
}
28202887

28212888
/*
@@ -2824,11 +2891,11 @@ RelationCacheInvalidateEntry(Oid relationId)
28242891
* and rebuild those with positive reference counts. Also reset the smgr
28252892
* relation cache and re-read relation mapping data.
28262893
*
2827-
*Thisis currently used only to recover from SI message buffer overflow,
2828-
* so we do not touch relations having new-in-transaction relfilenodes; they
2829-
* cannot be targets of cross-backend SI updates (and our own updates now go
2830-
*through a separate linked list that isn't limited by the SI message
2831-
* buffer size).
2894+
*Apart from debug_discard_caches, thisis currently used only to recover
2895+
*from SI message buffer overflow,so we do not touch relations having
2896+
*new-in-transaction relfilenodes; theycannot be targets of cross-backend
2897+
*SI updates (and our own updates now go through a separate linked list
2898+
*that isn't limited by the SI messagebuffer size).
28322899
*
28332900
* We do this in two phases: the first pass deletes deletable items, and
28342901
* the second one rebuilds the rebuildable items. This is essential for
@@ -2846,16 +2913,22 @@ RelationCacheInvalidateEntry(Oid relationId)
28462913
* second pass processes nailed-in-cache items before other nondeletable
28472914
* items. This should ensure that system catalogs are up to date before
28482915
* we attempt to use them to reload information about other open relations.
2916+
*
2917+
* After those two phases of work having immediate effects, we normally
2918+
* signal any RelationBuildDesc() on the stack to start over. However, we
2919+
* don't do this if called as part of debug_discard_caches. Otherwise,
2920+
* RelationBuildDesc() would become an infinite loop.
28492921
*/
28502922
void
2851-
RelationCacheInvalidate(void)
2923+
RelationCacheInvalidate(booldebug_discard)
28522924
{
28532925
HASH_SEQ_STATUSstatus;
28542926
RelIdCacheEnt*idhentry;
28552927
Relationrelation;
28562928
List*rebuildFirstList=NIL;
28572929
List*rebuildList=NIL;
28582930
ListCell*l;
2931+
inti;
28592932

28602933
/*
28612934
* Reload relation mapping data before starting to reconstruct cache.
@@ -2942,6 +3015,11 @@ RelationCacheInvalidate(void)
29423015
RelationClearRelation(relation, true);
29433016
}
29443017
list_free(rebuildList);
3018+
3019+
if (!debug_discard)
3020+
/* Any RelationBuildDesc() on the stack must start over. */
3021+
for (i=0;i<in_progress_list_len;i++)
3022+
in_progress_list[i].invalidated= true;
29453023
}
29463024

29473025
/*
@@ -3092,6 +3170,13 @@ AtEOXact_RelationCache(bool isCommit)
30923170
RelIdCacheEnt*idhentry;
30933171
inti;
30943172

3173+
/*
3174+
* Forget in_progress_list. This is relevant when we're aborting due to
3175+
* an error during RelationBuildDesc().
3176+
*/
3177+
Assert(in_progress_list_len==0|| !isCommit);
3178+
in_progress_list_len=0;
3179+
30953180
/*
30963181
* Unless the eoxact_list[] overflowed, we only need to examine the rels
30973182
* listed in it. Otherwise fall back on a hash_seq_search scan.
@@ -3238,6 +3323,14 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
32383323
RelIdCacheEnt*idhentry;
32393324
inti;
32403325

3326+
/*
3327+
* Forget in_progress_list. This is relevant when we're aborting due to
3328+
* an error during RelationBuildDesc(). We don't commit subtransactions
3329+
* during RelationBuildDesc().
3330+
*/
3331+
Assert(in_progress_list_len==0|| !isCommit);
3332+
in_progress_list_len=0;
3333+
32413334
/*
32423335
* Unless the eoxact_list[] overflowed, we only need to examine the rels
32433336
* listed in it. Otherwise fall back on a hash_seq_search scan. Same
@@ -3786,6 +3879,7 @@ void
37863879
RelationCacheInitialize(void)
37873880
{
37883881
HASHCTLctl;
3882+
intallocsize;
37893883

37903884
/*
37913885
* make sure cache memory context exists
@@ -3802,6 +3896,15 @@ RelationCacheInitialize(void)
38023896
RelationIdCache=hash_create("Relcache by OID",INITRELCACHESIZE,
38033897
&ctl,HASH_ELEM |HASH_BLOBS);
38043898

3899+
/*
3900+
* reserve enough in_progress_list slots for many cases
3901+
*/
3902+
allocsize=4;
3903+
in_progress_list=
3904+
MemoryContextAlloc(CacheMemoryContext,
3905+
allocsize*sizeof(*in_progress_list));
3906+
in_progress_list_maxlen=allocsize;
3907+
38053908
/*
38063909
* relation mapper needs to be initialized too
38073910
*/

‎src/include/utils/inval.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,5 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
6161
externvoidCallSyscacheCallbacks(intcacheid,uint32hashvalue);
6262

6363
externvoidInvalidateSystemCaches(void);
64+
externvoidInvalidateSystemCachesExtended(booldebug_discard);
6465
#endif/* INVAL_H */

‎src/include/utils/relcache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ extern void RelationForgetRelation(Oid rid);
120120

121121
externvoidRelationCacheInvalidateEntry(OidrelationId);
122122

123-
externvoidRelationCacheInvalidate(void);
123+
externvoidRelationCacheInvalidate(booldebug_discard);
124124

125125
externvoidRelationCloseSmgrByOid(OidrelationId);
126126

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp