Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7b6ec86

Browse files
committed
Fix risk of deadlock failure while dropping a partitioned index.
DROP INDEX needs to lock the index's table before the index itself,else it will deadlock against ordinary queries that acquire therelation locks in that order. This is correctly mechanized forplain indexes by RangeVarCallbackForDropRelation; but in the case ofa partitioned index, we neglected to lock the child tables in advanceof locking the child indexes. We can fix that by traversing theinheritance tree and acquiring the needed locks in RemoveRelations,after we have acquired our locks on the parent partitioned table andindex.While at it, do some refactoring to eliminate confusion betweenthe actual and expected relkind in RangeVarCallbackForDropRelation.We can save a couple of syscache lookups too, by having that functionpass back info that RemoveRelations will need.Back-patch to v11 where partitioned indexes were added.Jimmy Yih, Gaurab Dey, Tom LaneDiscussion:https://postgr.es/m/BYAPR05MB645402330042E17D91A70C12BD5F9@BYAPR05MB6454.namprd05.prod.outlook.com
1 parent1f8bc44 commit7b6ec86

File tree

4 files changed

+191
-18
lines changed

4 files changed

+191
-18
lines changed

‎src/backend/commands/tablecmds.c

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -295,12 +295,18 @@ static const struct dropmsgstrings dropmsgstringarray[] = {
295295
{'\0', 0, NULL, NULL, NULL, NULL}
296296
};
297297

298+
/* communication between RemoveRelations and RangeVarCallbackForDropRelation */
298299
struct DropRelationCallbackState
299300
{
300-
charrelkind;
301+
/* These fields are set by RemoveRelations: */
302+
charexpected_relkind;
303+
LOCKMODEheap_lockmode;
304+
/* These fields are state to track which subsidiary locks are held: */
301305
OidheapOid;
302306
OidpartParentOid;
303-
boolconcurrent;
307+
/* These fields are passed back by RangeVarCallbackForDropRelation: */
308+
charactual_relkind;
309+
charactual_relpersistence;
304310
};
305311

306312
/* Alter table target-type flags for ATSimplePermissions */
@@ -1416,10 +1422,13 @@ RemoveRelations(DropStmt *drop)
14161422
AcceptInvalidationMessages();
14171423

14181424
/* Look up the appropriate relation using namespace search. */
1419-
state.relkind = relkind;
1425+
state.expected_relkind = relkind;
1426+
state.heap_lockmode = drop->concurrent ?
1427+
ShareUpdateExclusiveLock : AccessExclusiveLock;
1428+
/* We must initialize these fields to show that no locks are held: */
14201429
state.heapOid = InvalidOid;
14211430
state.partParentOid = InvalidOid;
1422-
state.concurrent = drop->concurrent;
1431+
14231432
relOid = RangeVarGetRelidExtended(rel, lockmode, RVR_MISSING_OK,
14241433
RangeVarCallbackForDropRelation,
14251434
(void *) &state);
@@ -1433,10 +1442,10 @@ RemoveRelations(DropStmt *drop)
14331442

14341443
/*
14351444
* Decide if concurrent mode needs to be used here or not. The
1436-
*relation persistence cannot be known without its OID.
1445+
*callback retrieved the rel's persistence for us.
14371446
*/
14381447
if (drop->concurrent &&
1439-
get_rel_persistence(relOid) != RELPERSISTENCE_TEMP)
1448+
state.actual_relpersistence != RELPERSISTENCE_TEMP)
14401449
{
14411450
Assert(list_length(drop->objects) == 1 &&
14421451
drop->removeType == OBJECT_INDEX);
@@ -1448,12 +1457,24 @@ RemoveRelations(DropStmt *drop)
14481457
* either.
14491458
*/
14501459
if ((flags & PERFORM_DELETION_CONCURRENTLY) != 0 &&
1451-
get_rel_relkind(relOid) == RELKIND_PARTITIONED_INDEX)
1460+
state.actual_relkind == RELKIND_PARTITIONED_INDEX)
14521461
ereport(ERROR,
14531462
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
14541463
errmsg("cannot drop partitioned index \"%s\" concurrently",
14551464
rel->relname)));
14561465

1466+
/*
1467+
* If we're told to drop a partitioned index, we must acquire lock on
1468+
* all the children of its parent partitioned table before proceeding.
1469+
* Otherwise we'd try to lock the child index partitions before their
1470+
* tables, leading to potential deadlock against other sessions that
1471+
* will lock those objects in the other order.
1472+
*/
1473+
if (state.actual_relkind == RELKIND_PARTITIONED_INDEX)
1474+
(void) find_all_inheritors(state.heapOid,
1475+
state.heap_lockmode,
1476+
NULL);
1477+
14571478
/* OK, we're ready to delete this one */
14581479
obj.classId = RelationRelationId;
14591480
obj.objectId = relOid;
@@ -1479,17 +1500,14 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
14791500
{
14801501
HeapTupletuple;
14811502
struct DropRelationCallbackState *state;
1482-
charrelkind;
14831503
charexpected_relkind;
14841504
boolis_partition;
14851505
Form_pg_class classform;
14861506
LOCKMODEheap_lockmode;
14871507
boolinvalid_system_index = false;
14881508

14891509
state = (struct DropRelationCallbackState *) arg;
1490-
relkind = state->relkind;
1491-
heap_lockmode = state->concurrent ?
1492-
ShareUpdateExclusiveLock : AccessExclusiveLock;
1510+
heap_lockmode = state->heap_lockmode;
14931511

14941512
/*
14951513
* If we previously locked some other index's heap, and the name we're
@@ -1523,6 +1541,10 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
15231541
classform = (Form_pg_class) GETSTRUCT(tuple);
15241542
is_partition = classform->relispartition;
15251543

1544+
/* Pass back some data to save lookups in RemoveRelations */
1545+
state->actual_relkind = classform->relkind;
1546+
state->actual_relpersistence = classform->relpersistence;
1547+
15261548
/*
15271549
* Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
15281550
* but RemoveRelations() can only pass one relkind for a given relation.
@@ -1538,13 +1560,15 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
15381560
else
15391561
expected_relkind = classform->relkind;
15401562

1541-
if (relkind != expected_relkind)
1542-
DropErrorMsgWrongType(rel->relname, classform->relkind, relkind);
1563+
if (state->expected_relkind != expected_relkind)
1564+
DropErrorMsgWrongType(rel->relname, classform->relkind,
1565+
state->expected_relkind);
15431566

15441567
/* Allow DROP to either table owner or schema owner */
15451568
if (!pg_class_ownercheck(relOid, GetUserId()) &&
15461569
!pg_namespace_ownercheck(classform->relnamespace, GetUserId()))
1547-
aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)),
1570+
aclcheck_error(ACLCHECK_NOT_OWNER,
1571+
get_relkind_objtype(classform->relkind),
15481572
rel->relname);
15491573

15501574
/*
@@ -1553,7 +1577,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
15531577
* only concerns indexes of toast relations that became invalid during a
15541578
* REINDEX CONCURRENTLY process.
15551579
*/
1556-
if (IsSystemClass(relOid, classform) && relkind == RELKIND_INDEX)
1580+
if (IsSystemClass(relOid, classform) &&classform->relkind == RELKIND_INDEX)
15571581
{
15581582
HeapTuplelocTuple;
15591583
Form_pg_index indexform;
@@ -1589,9 +1613,10 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
15891613
* locking the index. index_drop() will need this anyway, and since
15901614
* regular queries lock tables before their indexes, we risk deadlock if
15911615
* we do it the other way around. No error if we don't find a pg_index
1592-
* entry, though --- the relation may have been dropped.
1616+
* entry, though --- the relation may have been dropped. Note that this
1617+
* code will execute for either plain or partitioned indexes.
15931618
*/
1594-
if ((relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX) &&
1619+
if (expected_relkind == RELKIND_INDEX &&
15951620
relOid != oldRelOid)
15961621
{
15971622
state->heapOid = IndexGetRelation(relOid, true);
@@ -1602,7 +1627,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
16021627
/*
16031628
* Similarly, if the relation is a partition, we must acquire lock on its
16041629
* parent before locking the partition. That's because queries lock the
1605-
* parent before its partitions, so we risk deadlockit we do it the other
1630+
* parent before its partitions, so we risk deadlockif we do it the other
16061631
* way around.
16071632
*/
16081633
if (is_partition && relOid != oldRelOid)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
Parsed test spec with 3 sessions
2+
3+
starting permutation: s1begin s1lock s2begin s2drop s1select s3getlocks s1commit s3getlocks s2commit
4+
step s1begin: BEGIN;
5+
step s1lock: LOCK TABLE part_drop_index_locking_subpart_child IN ACCESS SHARE MODE;
6+
step s2begin: BEGIN;
7+
step s2drop: DROP INDEX part_drop_index_locking_idx; <waiting ...>
8+
step s1select: SELECT * FROM part_drop_index_locking_subpart_child;
9+
id
10+
--
11+
(0 rows)
12+
13+
step s3getlocks:
14+
SELECT s.query, c.relname, l.mode, l.granted
15+
FROM pg_locks l
16+
JOIN pg_class c ON l.relation = c.oid
17+
JOIN pg_stat_activity s ON l.pid = s.pid
18+
WHERE c.relname LIKE 'part_drop_index_locking%'
19+
ORDER BY s.query, c.relname, l.mode, l.granted;
20+
21+
query |relname |mode |granted
22+
----------------------------------------------------+---------------------------------------------+-------------------+-------
23+
DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking |AccessExclusiveLock|t
24+
DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking_idx |AccessExclusiveLock|t
25+
DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking_subpart |AccessExclusiveLock|t
26+
DROP INDEX part_drop_index_locking_idx; |part_drop_index_locking_subpart_child |AccessExclusiveLock|f
27+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child |AccessShareLock |t
28+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx |AccessShareLock |t
29+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx1|AccessShareLock |t
30+
(7 rows)
31+
32+
step s1commit: COMMIT;
33+
step s2drop: <... completed>
34+
step s3getlocks:
35+
SELECT s.query, c.relname, l.mode, l.granted
36+
FROM pg_locks l
37+
JOIN pg_class c ON l.relation = c.oid
38+
JOIN pg_stat_activity s ON l.pid = s.pid
39+
WHERE c.relname LIKE 'part_drop_index_locking%'
40+
ORDER BY s.query, c.relname, l.mode, l.granted;
41+
42+
query |relname |mode |granted
43+
---------------------------------------+--------------------------------------------+-------------------+-------
44+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking |AccessExclusiveLock|t
45+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_idx |AccessExclusiveLock|t
46+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart |AccessExclusiveLock|t
47+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart_child |AccessExclusiveLock|t
48+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart_child_id_idx|AccessExclusiveLock|t
49+
DROP INDEX part_drop_index_locking_idx;|part_drop_index_locking_subpart_id_idx |AccessExclusiveLock|t
50+
(6 rows)
51+
52+
step s2commit: COMMIT;
53+
54+
starting permutation: s1begin s1lock s2begin s2dropsub s1select s3getlocks s1commit s3getlocks s2commit
55+
step s1begin: BEGIN;
56+
step s1lock: LOCK TABLE part_drop_index_locking_subpart_child IN ACCESS SHARE MODE;
57+
step s2begin: BEGIN;
58+
step s2dropsub: DROP INDEX part_drop_index_locking_subpart_idx; <waiting ...>
59+
step s1select: SELECT * FROM part_drop_index_locking_subpart_child;
60+
id
61+
--
62+
(0 rows)
63+
64+
step s3getlocks:
65+
SELECT s.query, c.relname, l.mode, l.granted
66+
FROM pg_locks l
67+
JOIN pg_class c ON l.relation = c.oid
68+
JOIN pg_stat_activity s ON l.pid = s.pid
69+
WHERE c.relname LIKE 'part_drop_index_locking%'
70+
ORDER BY s.query, c.relname, l.mode, l.granted;
71+
72+
query |relname |mode |granted
73+
----------------------------------------------------+---------------------------------------------+-------------------+-------
74+
DROP INDEX part_drop_index_locking_subpart_idx; |part_drop_index_locking_subpart |AccessExclusiveLock|t
75+
DROP INDEX part_drop_index_locking_subpart_idx; |part_drop_index_locking_subpart_child |AccessExclusiveLock|f
76+
DROP INDEX part_drop_index_locking_subpart_idx; |part_drop_index_locking_subpart_idx |AccessExclusiveLock|t
77+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child |AccessShareLock |t
78+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx |AccessShareLock |t
79+
SELECT * FROM part_drop_index_locking_subpart_child;|part_drop_index_locking_subpart_child_id_idx1|AccessShareLock |t
80+
(6 rows)
81+
82+
step s1commit: COMMIT;
83+
step s2dropsub: <... completed>
84+
step s3getlocks:
85+
SELECT s.query, c.relname, l.mode, l.granted
86+
FROM pg_locks l
87+
JOIN pg_class c ON l.relation = c.oid
88+
JOIN pg_stat_activity s ON l.pid = s.pid
89+
WHERE c.relname LIKE 'part_drop_index_locking%'
90+
ORDER BY s.query, c.relname, l.mode, l.granted;
91+
92+
query |relname |mode |granted
93+
-----------------------------------------------+---------------------------------------------+-------------------+-------
94+
DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart |AccessExclusiveLock|t
95+
DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart_child |AccessExclusiveLock|t
96+
DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart_child_id_idx1|AccessExclusiveLock|t
97+
DROP INDEX part_drop_index_locking_subpart_idx;|part_drop_index_locking_subpart_idx |AccessExclusiveLock|t
98+
(4 rows)
99+
100+
step s2commit: COMMIT;

‎src/test/isolation/isolation_schedule

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ test: predicate-hash
9090
test: predicate-gist
9191
test: predicate-gin
9292
test: partition-concurrent-attach
93+
test: partition-drop-index-locking
9394
test: partition-key-update-1
9495
test: partition-key-update-2
9596
test: partition-key-update-3
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Verify that DROP INDEX properly locks all downward sub-partitions
2+
# and partitions before locking the indexes.
3+
4+
setup
5+
{
6+
CREATETABLEpart_drop_index_locking (idint)PARTITIONBYRANGE(id);
7+
CREATETABLEpart_drop_index_locking_subpartPARTITIONOFpart_drop_index_lockingFORVALUESFROM (1)TO (100)PARTITIONBYRANGE(id);
8+
CREATETABLEpart_drop_index_locking_subpart_childPARTITIONOFpart_drop_index_locking_subpartFORVALUESFROM (1)TO (100);
9+
CREATEINDEXpart_drop_index_locking_idxONpart_drop_index_locking(id);
10+
CREATEINDEXpart_drop_index_locking_subpart_idxONpart_drop_index_locking_subpart(id);
11+
}
12+
13+
teardown
14+
{
15+
DROPTABLEpart_drop_index_locking;
16+
}
17+
18+
# SELECT will take AccessShare lock first on the table and then on its index.
19+
# We can simulate the case where DROP INDEX starts between those steps
20+
# by manually taking the table lock beforehand.
21+
sessions1
22+
steps1begin {BEGIN; }
23+
steps1lock {LOCKTABLEpart_drop_index_locking_subpart_childINACCESSSHAREMODE; }
24+
steps1select {SELECT*FROMpart_drop_index_locking_subpart_child; }
25+
steps1commit {COMMIT; }
26+
27+
sessions2
28+
steps2begin {BEGIN; }
29+
steps2drop {DROPINDEXpart_drop_index_locking_idx; }
30+
steps2dropsub {DROPINDEXpart_drop_index_locking_subpart_idx; }
31+
steps2commit {COMMIT; }
32+
33+
sessions3
34+
steps3getlocks {
35+
SELECTs.query,c.relname,l.mode,l.granted
36+
FROMpg_locksl
37+
JOINpg_classcONl.relation=c.oid
38+
JOINpg_stat_activitysONl.pid=s.pid
39+
WHEREc.relnameLIKE'part_drop_index_locking%'
40+
ORDERBYs.query,c.relname,l.mode,l.granted;
41+
}
42+
43+
# Run DROP INDEX on top partitioned table
44+
permutations1begins1locks2begins2drop(s1commit)s1selects3getlockss1commits3getlockss2commit
45+
46+
# Run DROP INDEX on top sub-partition table
47+
permutations1begins1locks2begins2dropsub(s1commit)s1selects3getlockss1commits3getlockss2commit

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp