Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf481d28

Browse files
committed
Check default partitions constraints while descending
Partitioning tuple route code assumes that the partition chosen whiledescending the partition hierarchy is always the correct one. This istrue except when the partition is the default partition and anotherpartition has been added concurrently: the partition constraint changesand we don't recheck it. This can lead to tuples mistakenly being addedto the default partition that should have been rejected.Fix by rechecking the default partition constraint while descending thehierarchy.An isolation test based on the reproduction steps described by Hao Wu(with tweaks for extra coverage) is included.Backpatch to 12, where this bug came in with898e5e3.Reported by: Hao Wu <hawu@vmware.com>Author: Amit Langote <amitlangote09@gmail.com>Author: Álvaro Herrera <alvherre@alvh.no-ip.org>Discussion:https://postgr.es/m/CA+HiwqFqBmcSSap4sFnCBUEL_VfOMmEKaQ3gwUhyfa4c7J_-nA@mail.gmail.comDiscussion:https://postgr.es/m/DM5PR0501MB3910E97A9EDFB4C775CF3D75A42F0@DM5PR0501MB3910.namprd05.prod.outlook.com
1 parentc9ae5cb commitf481d28

File tree

4 files changed

+195
-25
lines changed

4 files changed

+195
-25
lines changed

‎src/backend/executor/execPartition.c

Lines changed: 102 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@
5151
*PartitionDispatchData->indexes for details on how this array is
5252
*indexed.
5353
*
54+
* nonleaf_partitions
55+
*Array of 'max_dispatch' elements containing pointers to fake
56+
*ResultRelInfo objects for nonleaf partitions, useful for checking
57+
*the partition constraint.
58+
*
5459
* num_dispatch
5560
*The current number of items stored in the 'partition_dispatch_info'
5661
*array. Also serves as the index of the next free array element for
@@ -89,6 +94,7 @@ struct PartitionTupleRouting
8994
{
9095
Relationpartition_root;
9196
PartitionDispatch*partition_dispatch_info;
97+
ResultRelInfo**nonleaf_partitions;
9298
intnum_dispatch;
9399
intmax_dispatch;
94100
ResultRelInfo**partitions;
@@ -280,9 +286,11 @@ ExecFindPartition(ModifyTableState *mtstate,
280286
PartitionDispatchdispatch;
281287
PartitionDescpartdesc;
282288
ExprContext*ecxt=GetPerTupleExprContext(estate);
283-
TupleTableSlot*ecxt_scantuple_old=ecxt->ecxt_scantuple;
289+
TupleTableSlot*ecxt_scantuple_saved=ecxt->ecxt_scantuple;
290+
TupleTableSlot*rootslot=slot;
284291
TupleTableSlot*myslot=NULL;
285292
MemoryContextoldcxt;
293+
ResultRelInfo*rri=NULL;
286294

287295
/* use per-tuple context here to avoid leaking memory */
288296
oldcxt=MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
@@ -296,27 +304,15 @@ ExecFindPartition(ModifyTableState *mtstate,
296304

297305
/* start with the root partitioned table */
298306
dispatch=pd[0];
299-
while (true)
307+
while (dispatch!=NULL)
300308
{
301-
AttrMap*map=dispatch->tupmap;
302309
intpartidx=-1;
303310

304311
CHECK_FOR_INTERRUPTS();
305312

306313
rel=dispatch->reldesc;
307314
partdesc=dispatch->partdesc;
308315

309-
/*
310-
* Convert the tuple to this parent's layout, if different from the
311-
* current relation.
312-
*/
313-
myslot=dispatch->tupslot;
314-
if (myslot!=NULL)
315-
{
316-
Assert(map!=NULL);
317-
slot=execute_attr_map_slot(map,slot,myslot);
318-
}
319-
320316
/*
321317
* Extract partition key from tuple. Expression evaluation machinery
322318
* that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
@@ -352,11 +348,9 @@ ExecFindPartition(ModifyTableState *mtstate,
352348

353349
if (partdesc->is_leaf[partidx])
354350
{
355-
ResultRelInfo*rri;
356-
357351
/*
358-
*Look to see ifwe've already got a ResultRelInfo for this
359-
* partition.
352+
*We've reached the leaf -- hurray,we're done. Look to see if
353+
*we've already got a ResultRelInfo for thispartition.
360354
*/
361355
if (likely(dispatch->indexes[partidx] >=0))
362356
{
@@ -400,14 +394,10 @@ ExecFindPartition(ModifyTableState *mtstate,
400394
dispatch,
401395
rootResultRelInfo,partidx);
402396
}
397+
Assert(rri!=NULL);
403398

404-
/* Release the tuple in the lowest parent's dedicated slot. */
405-
if (slot==myslot)
406-
ExecClearTuple(myslot);
407-
408-
MemoryContextSwitchTo(oldcxt);
409-
ecxt->ecxt_scantuple=ecxt_scantuple_old;
410-
returnrri;
399+
/* Signal to terminate the loop */
400+
dispatch=NULL;
411401
}
412402
else
413403
{
@@ -419,6 +409,8 @@ ExecFindPartition(ModifyTableState *mtstate,
419409
/* Already built. */
420410
Assert(dispatch->indexes[partidx]<proute->num_dispatch);
421411

412+
rri=proute->nonleaf_partitions[dispatch->indexes[partidx]];
413+
422414
/*
423415
* Move down to the next partition level and search again
424416
* until we find a leaf partition that matches this tuple
@@ -440,10 +432,75 @@ ExecFindPartition(ModifyTableState *mtstate,
440432
dispatch,partidx);
441433
Assert(dispatch->indexes[partidx] >=0&&
442434
dispatch->indexes[partidx]<proute->num_dispatch);
435+
436+
rri=proute->nonleaf_partitions[dispatch->indexes[partidx]];
443437
dispatch=subdispatch;
444438
}
439+
440+
/*
441+
* Convert the tuple to the new parent's layout, if different from
442+
* the previous parent.
443+
*/
444+
if (dispatch->tupslot)
445+
{
446+
AttrMap*map=dispatch->tupmap;
447+
TupleTableSlot*tempslot=myslot;
448+
449+
myslot=dispatch->tupslot;
450+
slot=execute_attr_map_slot(map,slot,myslot);
451+
452+
if (tempslot!=NULL)
453+
ExecClearTuple(tempslot);
454+
}
455+
}
456+
457+
/*
458+
* If this partition is the default one, we must check its partition
459+
* constraint now, which may have changed concurrently due to
460+
* partitions being added to the parent.
461+
*
462+
* (We do this here, and do not rely on ExecInsert doing it, because
463+
* we don't want to miss doing it for non-leaf partitions.)
464+
*/
465+
if (partidx==partdesc->boundinfo->default_index)
466+
{
467+
PartitionRoutingInfo*partrouteinfo=rri->ri_PartitionInfo;
468+
469+
/*
470+
* The tuple must match the partition's layout for the constraint
471+
* expression to be evaluated successfully. If the partition is
472+
* sub-partitioned, that would already be the case due to the code
473+
* above, but for a leaf partition the tuple still matches the
474+
* parent's layout.
475+
*
476+
* Note that we have a map to convert from root to current
477+
* partition, but not from immediate parent to current partition.
478+
* So if we have to convert, do it from the root slot; if not, use
479+
* the root slot as-is.
480+
*/
481+
if (partrouteinfo)
482+
{
483+
TupleConversionMap*map=partrouteinfo->pi_RootToPartitionMap;
484+
485+
if (map)
486+
slot=execute_attr_map_slot(map->attrMap,rootslot,
487+
partrouteinfo->pi_PartitionTupleSlot);
488+
else
489+
slot=rootslot;
490+
}
491+
492+
ExecPartitionCheck(rri,slot,estate, true);
445493
}
446494
}
495+
496+
/* Release the tuple in the lowest parent's dedicated slot. */
497+
if (myslot!=NULL)
498+
ExecClearTuple(myslot);
499+
/* and restore ecxt's scantuple */
500+
ecxt->ecxt_scantuple=ecxt_scantuple_saved;
501+
MemoryContextSwitchTo(oldcxt);
502+
503+
returnrri;
447504
}
448505

449506
/*
@@ -1060,17 +1117,37 @@ ExecInitPartitionDispatchInfo(EState *estate,
10601117
proute->max_dispatch=4;
10611118
proute->partition_dispatch_info= (PartitionDispatch*)
10621119
palloc(sizeof(PartitionDispatch)*proute->max_dispatch);
1120+
proute->nonleaf_partitions= (ResultRelInfo**)
1121+
palloc(sizeof(ResultRelInfo*)*proute->max_dispatch);
10631122
}
10641123
else
10651124
{
10661125
proute->max_dispatch *=2;
10671126
proute->partition_dispatch_info= (PartitionDispatch*)
10681127
repalloc(proute->partition_dispatch_info,
10691128
sizeof(PartitionDispatch)*proute->max_dispatch);
1129+
proute->nonleaf_partitions= (ResultRelInfo**)
1130+
repalloc(proute->nonleaf_partitions,
1131+
sizeof(ResultRelInfo*)*proute->max_dispatch);
10701132
}
10711133
}
10721134
proute->partition_dispatch_info[dispatchidx]=pd;
10731135

1136+
/*
1137+
* If setting up a PartitionDispatch for a sub-partitioned table, we may
1138+
* also need a minimally valid ResultRelInfo for checking the partition
1139+
* constraint later; set that up now.
1140+
*/
1141+
if (parent_pd)
1142+
{
1143+
ResultRelInfo*rri=makeNode(ResultRelInfo);
1144+
1145+
InitResultRelInfo(rri,rel,1,proute->partition_root,0);
1146+
proute->nonleaf_partitions[dispatchidx]=rri;
1147+
}
1148+
else
1149+
proute->nonleaf_partitions[dispatchidx]=NULL;
1150+
10741151
/*
10751152
* Finally, if setting up a PartitionDispatch for a sub-partitioned table,
10761153
* install a downlink in the parent to allow quick descent.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
Parsed test spec with 2 sessions
2+
3+
starting permutation: s1b s1a s2b s2i s1c s2c s2s
4+
step s1b: begin;
5+
step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200);
6+
step s2b: begin;
7+
step s2i: insert into tpart values (110,'xxx'), (120, 'yyy'), (150, 'zzz'); <waiting ...>
8+
step s1c: commit;
9+
step s2i: <... completed>
10+
error in steps s1c s2i: ERROR: new row for relation "tpart_default" violates partition constraint
11+
step s2c: commit;
12+
step s2s: select tableoid::regclass, * from tpart;
13+
tableoid i j
14+
15+
tpart_2 110 xxx
16+
tpart_2 120 yyy
17+
tpart_2 150 zzz
18+
19+
starting permutation: s1b s1a s2b s2i2 s1c s2c s2s
20+
step s1b: begin;
21+
step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200);
22+
step s2b: begin;
23+
step s2i2: insert into tpart_default (i, j) values (110, 'xxx'), (120, 'yyy'), (150, 'zzz'); <waiting ...>
24+
step s1c: commit;
25+
step s2i2: <... completed>
26+
error in steps s1c s2i2: ERROR: new row for relation "tpart_default" violates partition constraint
27+
step s2c: commit;
28+
step s2s: select tableoid::regclass, * from tpart;
29+
tableoid i j
30+
31+
tpart_2 110 xxx
32+
tpart_2 120 yyy
33+
tpart_2 150 zzz
34+
35+
starting permutation: s1b s2b s2i s1a s2c s1c s2s
36+
step s1b: begin;
37+
step s2b: begin;
38+
step s2i: insert into tpart values (110,'xxx'), (120, 'yyy'), (150, 'zzz');
39+
step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200); <waiting ...>
40+
step s2c: commit;
41+
step s1a: <... completed>
42+
error in steps s2c s1a: ERROR: updated partition constraint for default partition "tpart_default_default" would be violated by some row
43+
step s1c: commit;
44+
step s2s: select tableoid::regclass, * from tpart;
45+
tableoid i j
46+
47+
tpart_default_default110 xxx
48+
tpart_default_default120 yyy
49+
tpart_default_default150 zzz

‎src/test/isolation/isolation_schedule

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ test: vacuum-skip-locked
8181
test: predicate-hash
8282
test: predicate-gist
8383
test: predicate-gin
84+
test: partition-concurrent-attach
8485
test: partition-key-update-1
8586
test: partition-key-update-2
8687
test: partition-key-update-3
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Verify that default partition constraint is enforced correctly
2+
# in light of partitions being added concurrently to its parent
3+
setup {
4+
droptableifexiststpart;
5+
createtabletpart(iint,jtext)partitionbyrange(i);
6+
createtabletpart_1(liketpart);
7+
createtabletpart_2(liketpart);
8+
createtabletpart_default (aint,jtext,iint)partitionbylist (j);
9+
createtabletpart_default_default (aint,iint,bint,jtext);
10+
altertabletpart_default_defaultdropb;
11+
altertabletpart_defaultattachpartitiontpart_default_defaultdefault;
12+
altertabletpart_defaultdropa;
13+
altertabletpartattachpartitiontpart_defaultdefault;
14+
altertabletpartattachpartitiontpart_1forvaluesfrom(0)to (100);
15+
insertintotpart_2values (110,'xxx'), (120,'yyy'), (150,'zzz');
16+
}
17+
18+
session"s1"
19+
step"s1b"{begin; }
20+
step"s1a"{altertabletpartattachpartitiontpart_2forvaluesfrom (100)to (200); }
21+
step"s1c"{commit; }
22+
23+
session"s2"
24+
step"s2b"{begin; }
25+
step"s2i"{insertintotpartvalues (110,'xxx'), (120,'yyy'), (150,'zzz'); }
26+
step"s2i2"{insertintotpart_default (i,j)values (110,'xxx'), (120,'yyy'), (150,'zzz'); }
27+
step"s2c"{commit; }
28+
step"s2s"{selecttableoid::regclass,*fromtpart; }
29+
30+
teardown{droptabletpart; }
31+
32+
# insert into tpart by s2 which routes to tpart_default due to not seeing
33+
# concurrently added tpart_2 should fail, because the partition constraint
34+
# of tpart_default would have changed due to tpart_2 having been added
35+
permutation"s1b""s1a""s2b""s2i""s1c""s2c""s2s"
36+
37+
# similar to above, but now insert into sub-partitioned tpart_default
38+
permutation"s1b""s1a""s2b""s2i2""s1c""s2c""s2s"
39+
40+
# reverse: now the insert into tpart_default by s2 occurs first followed by
41+
# attach in s1, which should fail when it scans the leaf default partition
42+
# find the violating rows
43+
permutation"s1b""s2b""s2i""s1a""s2c""s1c""s2s"

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp