Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite428699

Browse files
committed
Avoid race in RelationBuildDesc() affecting CREATE INDEX CONCURRENTLY.
CIC and REINDEX CONCURRENTLY assume backends see their catalog changesno later than each backend's next transaction start. That failed tohold when a backend absorbed a relevant invalidation in the middle ofrunning RelationBuildDesc() on the CIC index. Queries that use theresulting index can silently fail to find rows. Fix this for futureindex builds by making RelationBuildDesc() loop until it finisheswithout accepting a relevant invalidation. It may be necessary toreindex to recover from past occurrences; REINDEX CONCURRENTLY suffices.Back-patch to 9.6 (all supported versions).Noah Misch and Andrey Borodin, reviewed (in earlier versions) by AndresFreund.Discussion:https://postgr.es/m/20210730022548.GA1940096@gust.leadboat.com
1 parentb1df061 commite428699

File tree

8 files changed

+408
-4
lines changed

8 files changed

+408
-4
lines changed

‎src/backend/utils/cache/inval.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -631,12 +631,18 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
631631
*/
632632
void
633633
InvalidateSystemCaches(void)
634+
{
635+
InvalidateSystemCachesExtended(false);
636+
}
637+
638+
void
639+
InvalidateSystemCachesExtended(booldebug_discard)
634640
{
635641
inti;
636642

637643
InvalidateCatalogSnapshot();
638644
ResetCatalogCaches();
639-
RelationCacheInvalidate();/* gets smgr and relmap too */
645+
RelationCacheInvalidate(debug_discard);/* gets smgr and relmap too */
640646

641647
for (i=0;i<syscache_callback_count;i++)
642648
{
@@ -707,7 +713,7 @@ AcceptInvalidationMessages(void)
707713
if (recursion_depth<3)
708714
{
709715
recursion_depth++;
710-
InvalidateSystemCaches();
716+
InvalidateSystemCachesExtended(true);
711717
recursion_depth--;
712718
}
713719
}

‎src/backend/utils/cache/relcache.c

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,24 @@ boolcriticalSharedRelcachesBuilt = false;
135135
*/
136136
staticlongrelcacheInvalsReceived=0L;
137137

138+
/*
139+
* in_progress_list is a stack of ongoing RelationBuildDesc() calls. CREATE
140+
* INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock.
141+
* It critically relies on each backend absorbing those changes no later than
142+
* next transaction start. Hence, RelationBuildDesc() loops until it finishes
143+
* without accepting a relevant invalidation. (Most invalidation consumers
144+
* don't do this.)
145+
*/
146+
typedefstructinprogressent
147+
{
148+
Oidreloid;/* OID of relation being built */
149+
boolinvalidated;/* whether an invalidation arrived for it */
150+
}InProgressEnt;
151+
152+
staticInProgressEnt*in_progress_list;
153+
staticintin_progress_list_len;
154+
staticintin_progress_list_maxlen;
155+
138156
/*
139157
* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
140158
* cleanup work. This list intentionally has limited size; if it overflows,
@@ -938,11 +956,27 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
938956
staticRelation
939957
RelationBuildDesc(OidtargetRelId,boolinsertIt)
940958
{
959+
intin_progress_offset;
941960
Relationrelation;
942961
Oidrelid;
943962
HeapTuplepg_class_tuple;
944963
Form_pg_classrelp;
945964

965+
/* Register to catch invalidation messages */
966+
if (in_progress_list_len >=in_progress_list_maxlen)
967+
{
968+
intallocsize;
969+
970+
allocsize=in_progress_list_maxlen*2;
971+
in_progress_list=repalloc(in_progress_list,
972+
allocsize*sizeof(*in_progress_list));
973+
in_progress_list_maxlen=allocsize;
974+
}
975+
in_progress_offset=in_progress_list_len++;
976+
in_progress_list[in_progress_offset].reloid=targetRelId;
977+
retry:
978+
in_progress_list[in_progress_offset].invalidated= false;
979+
946980
/*
947981
* find the tuple in pg_class corresponding to the given relation id
948982
*/
@@ -952,7 +986,11 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
952986
* if no such tuple exists, return NULL
953987
*/
954988
if (!HeapTupleIsValid(pg_class_tuple))
989+
{
990+
Assert(in_progress_offset+1==in_progress_list_len);
991+
in_progress_list_len--;
955992
returnNULL;
993+
}
956994

957995
/*
958996
* get information from the pg_class_tuple
@@ -1078,6 +1116,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
10781116
*/
10791117
heap_freetuple(pg_class_tuple);
10801118

1119+
/*
1120+
* If an invalidation arrived mid-build, start over. Between here and the
1121+
* end of this function, don't add code that does or reasonably could read
1122+
* system catalogs. That range must be free from invalidation processing
1123+
* for the !insertIt case. For the insertIt case, RelationCacheInsert()
1124+
* will enroll this relation in ordinary relcache invalidation processing,
1125+
*/
1126+
if (in_progress_list[in_progress_offset].invalidated)
1127+
{
1128+
RelationDestroyRelation(relation, false);
1129+
gotoretry;
1130+
}
1131+
Assert(in_progress_offset+1==in_progress_list_len);
1132+
in_progress_list_len--;
1133+
10811134
/*
10821135
* Insert newly created relation into relcache hash table, if requested.
10831136
*
@@ -2282,6 +2335,14 @@ RelationClearRelation(Relation relation, bool rebuild)
22822335

22832336
/* Build temporary entry, but don't link it into hashtable */
22842337
newrel=RelationBuildDesc(save_relid, false);
2338+
2339+
/*
2340+
* Between here and the end of the swap, don't add code that does or
2341+
* reasonably could read system catalogs. That range must be free
2342+
* from invalidation processing. See RelationBuildDesc() manipulation
2343+
* of in_progress_list.
2344+
*/
2345+
22852346
if (newrel==NULL)
22862347
{
22872348
/*
@@ -2457,6 +2518,14 @@ RelationCacheInvalidateEntry(Oid relationId)
24572518
relcacheInvalsReceived++;
24582519
RelationFlushRelation(relation);
24592520
}
2521+
else
2522+
{
2523+
inti;
2524+
2525+
for (i=0;i<in_progress_list_len;i++)
2526+
if (in_progress_list[i].reloid==relationId)
2527+
in_progress_list[i].invalidated= true;
2528+
}
24602529
}
24612530

24622531
/*
@@ -2488,16 +2557,22 @@ RelationCacheInvalidateEntry(Oid relationId)
24882557
* second pass processes nailed-in-cache items before other nondeletable
24892558
* items. This should ensure that system catalogs are up to date before
24902559
* we attempt to use them to reload information about other open relations.
2560+
*
2561+
* After those two phases of work having immediate effects, we normally
2562+
* signal any RelationBuildDesc() on the stack to start over. However, we
2563+
* don't do this if called as part of debug_discard_caches. Otherwise,
2564+
* RelationBuildDesc() would become an infinite loop.
24912565
*/
24922566
void
2493-
RelationCacheInvalidate(void)
2567+
RelationCacheInvalidate(booldebug_discard)
24942568
{
24952569
HASH_SEQ_STATUSstatus;
24962570
RelIdCacheEnt*idhentry;
24972571
Relationrelation;
24982572
List*rebuildFirstList=NIL;
24992573
List*rebuildList=NIL;
25002574
ListCell*l;
2575+
inti;
25012576

25022577
/*
25032578
* Reload relation mapping data before starting to reconstruct cache.
@@ -2584,6 +2659,11 @@ RelationCacheInvalidate(void)
25842659
RelationClearRelation(relation, true);
25852660
}
25862661
list_free(rebuildList);
2662+
2663+
if (!debug_discard)
2664+
/* Any RelationBuildDesc() on the stack must start over. */
2665+
for (i=0;i<in_progress_list_len;i++)
2666+
in_progress_list[i].invalidated= true;
25872667
}
25882668

25892669
/*
@@ -2656,6 +2736,13 @@ AtEOXact_RelationCache(bool isCommit)
26562736
RelIdCacheEnt*idhentry;
26572737
inti;
26582738

2739+
/*
2740+
* Forget in_progress_list. This is relevant when we're aborting due to
2741+
* an error during RelationBuildDesc().
2742+
*/
2743+
Assert(in_progress_list_len==0|| !isCommit);
2744+
in_progress_list_len=0;
2745+
26592746
/*
26602747
* Unless the eoxact_list[] overflowed, we only need to examine the rels
26612748
* listed in it. Otherwise fall back on a hash_seq_search scan.
@@ -2804,6 +2891,14 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
28042891
RelIdCacheEnt*idhentry;
28052892
inti;
28062893

2894+
/*
2895+
* Forget in_progress_list. This is relevant when we're aborting due to
2896+
* an error during RelationBuildDesc(). We don't commit subtransactions
2897+
* during RelationBuildDesc().
2898+
*/
2899+
Assert(in_progress_list_len==0|| !isCommit);
2900+
in_progress_list_len=0;
2901+
28072902
/*
28082903
* Unless the eoxact_list[] overflowed, we only need to examine the rels
28092904
* listed in it. Otherwise fall back on a hash_seq_search scan. Same
@@ -3287,6 +3382,7 @@ void
32873382
RelationCacheInitialize(void)
32883383
{
32893384
HASHCTLctl;
3385+
intallocsize;
32903386

32913387
/*
32923388
* make sure cache memory context exists
@@ -3303,6 +3399,15 @@ RelationCacheInitialize(void)
33033399
RelationIdCache=hash_create("Relcache by OID",INITRELCACHESIZE,
33043400
&ctl,HASH_ELEM |HASH_BLOBS);
33053401

3402+
/*
3403+
* reserve enough in_progress_list slots for many cases
3404+
*/
3405+
allocsize=4;
3406+
in_progress_list=
3407+
MemoryContextAlloc(CacheMemoryContext,
3408+
allocsize*sizeof(*in_progress_list));
3409+
in_progress_list_maxlen=allocsize;
3410+
33063411
/*
33073412
* relation mapper needs to be initialized too
33083413
*/

‎src/bin/pgbench/t/022_cic.pl

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
2+
# Copyright (c) 2021, PostgreSQL Global Development Group
3+
4+
# Test CREATE INDEX CONCURRENTLY with concurrent modifications
5+
use strict;
6+
use warnings;
7+
8+
use Config;
9+
use PostgresNode;
10+
use TestLib;
11+
12+
use Test::Moretests=> 4;
13+
14+
my ($node,$result);
15+
16+
#
17+
# Test set-up
18+
#
19+
$node = get_new_node('CIC_test');
20+
$node->init;
21+
$node->append_conf('postgresql.conf','lock_timeout = 180000');
22+
$node->start;
23+
$node->safe_psql('postgres',q(CREATE TABLE tbl(i int)));
24+
$node->safe_psql('postgres',q(CREATE INDEX idx ON tbl(i)));
25+
$node->safe_psql(
26+
'postgres',q(
27+
CREATE FUNCTION heapallindexed() RETURNS void AS $$
28+
DECLARE
29+
count_seqscan int;
30+
count_idxscan int;
31+
BEGIN
32+
count_seqscan := (SELECT count(*) FROM tbl);
33+
SET enable_seqscan = off;
34+
count_idxscan := (SELECT count(*) FROM tbl);
35+
RESET enable_seqscan;
36+
IF count_seqscan <> count_idxscan THEN
37+
RAISE 'seqscan found % rows, but idxscan found % rows',
38+
count_seqscan, count_idxscan;
39+
END IF;
40+
END
41+
$$ LANGUAGE plpgsql;
42+
));
43+
44+
#
45+
# Stress CIC with pgbench
46+
#
47+
48+
# Run background pgbench with CIC. We cannot mix-in this script into single
49+
# pgbench: CIC will deadlock with itself occasionally.
50+
my$pgbench_out ='';
51+
my$pgbench_timer = IPC::Run::timeout(180);
52+
my$pgbench_h =$node->background_pgbench(
53+
'--no-vacuum --client=1 --transactions=200',
54+
{
55+
'002_pgbench_concurrent_cic'=>q(
56+
DROP INDEX CONCURRENTLY idx;
57+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
58+
BEGIN ISOLATION LEVEL REPEATABLE READ;
59+
SELECT heapallindexed();
60+
ROLLBACK;
61+
)
62+
},
63+
\$pgbench_out,
64+
$pgbench_timer);
65+
66+
# Run pgbench.
67+
$node->pgbench(
68+
'--no-vacuum --client=5 --transactions=200',
69+
0,
70+
[qr{actually processed}],
71+
[qr{^$}],
72+
'concurrent INSERTs',
73+
{
74+
'002_pgbench_concurrent_transaction'=>q(
75+
BEGIN;
76+
INSERT INTO tbl VALUES(0);
77+
COMMIT;
78+
),
79+
'002_pgbench_concurrent_transaction_savepoints'=>q(
80+
BEGIN;
81+
SAVEPOINT s1;
82+
INSERT INTO tbl VALUES(0);
83+
COMMIT;
84+
)
85+
});
86+
87+
$pgbench_h->pump_nb;
88+
$pgbench_h->finish();
89+
unlike($pgbench_out,qr/aborted in/,"pgbench with CIC works");
90+
91+
# done
92+
$node->stop;
93+
done_testing();

‎src/include/utils/inval.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,5 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
6161
externvoidCallSyscacheCallbacks(intcacheid,uint32hashvalue);
6262

6363
externvoidInvalidateSystemCaches(void);
64+
externvoidInvalidateSystemCachesExtended(booldebug_discard);
6465
#endif/* INVAL_H */

‎src/include/utils/relcache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ extern void RelationForgetRelation(Oid rid);
107107

108108
externvoidRelationCacheInvalidateEntry(OidrelationId);
109109

110-
externvoidRelationCacheInvalidate(void);
110+
externvoidRelationCacheInvalidate(booldebug_discard);
111111

112112
externvoidRelationCloseSmgrByOid(OidrelationId);
113113

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp