Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2ac3ef7

Browse files
committed
Fix tuple routing in cases where tuple descriptors don't match.
The previous coding failed to work correctly when we have amulti-level partitioned hierarchy where tables at successive levelshave different attribute numbers for the partition key attributes. Tofix, have each PartitionDispatch object store a standaloneTupleTableSlot initialized with the TupleDesc of the correspondingpartitioned table, along with a TupleConversionMap to map tuples fromthe its parent's rowtype to own rowtype. After tuple routing choosesa leaf partition, we must use the leaf partition's tuple descriptor,not the root table's. To that end, a dedicated TupleTableSlot fortuple routing is now allocated in EState.Amit Langote
1 parent12bd7dd commit2ac3ef7

File tree

7 files changed

+190
-15
lines changed

7 files changed

+190
-15
lines changed

‎src/backend/catalog/partition.c

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -923,13 +923,19 @@ RelationGetPartitionQual(Relation rel, bool recurse)
923923
returngenerate_partition_qual(rel,recurse);
924924
}
925925

926-
/* Turn an array of OIDs with N elements into a list */
927-
#defineOID_ARRAY_TO_LIST(arr,N,list) \
926+
/*
927+
* Append OIDs of rel's partitions to the list 'partoids' and for each OID,
928+
* append pointer rel to the list 'parents'.
929+
*/
930+
#defineAPPEND_REL_PARTITION_OIDS(rel,partoids,parents) \
928931
do\
929932
{\
930933
inti;\
931-
for (i = 0; i < (N); i++)\
932-
(list) = lappend_oid((list), (arr)[i]);\
934+
for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\
935+
{\
936+
(partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\
937+
(parents) = lappend((parents), (rel));\
938+
}\
933939
} while(0)
934940

935941
/*
@@ -944,11 +950,13 @@ PartitionDispatch *
944950
RelationGetPartitionDispatchInfo(Relationrel,intlockmode,
945951
int*num_parted,List**leaf_part_oids)
946952
{
947-
PartitionDescrootpartdesc=RelationGetPartitionDesc(rel);
948953
PartitionDispatchData**pd;
949954
List*all_parts=NIL,
950-
*parted_rels;
951-
ListCell*lc;
955+
*all_parents=NIL,
956+
*parted_rels,
957+
*parted_rel_parents;
958+
ListCell*lc1,
959+
*lc2;
952960
inti,
953961
k,
954962
offset;
@@ -965,10 +973,13 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
965973
*/
966974
*num_parted=1;
967975
parted_rels=list_make1(rel);
968-
OID_ARRAY_TO_LIST(rootpartdesc->oids,rootpartdesc->nparts,all_parts);
969-
foreach(lc,all_parts)
976+
/* Root partitioned table has no parent, so NULL for parent */
977+
parted_rel_parents=list_make1(NULL);
978+
APPEND_REL_PARTITION_OIDS(rel,all_parts,all_parents);
979+
forboth(lc1,all_parts,lc2,all_parents)
970980
{
971-
Relationpartrel=heap_open(lfirst_oid(lc),lockmode);
981+
Relationpartrel=heap_open(lfirst_oid(lc1),lockmode);
982+
Relationparent=lfirst(lc2);
972983
PartitionDescpartdesc=RelationGetPartitionDesc(partrel);
973984

974985
/*
@@ -979,7 +990,8 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
979990
{
980991
(*num_parted)++;
981992
parted_rels=lappend(parted_rels,partrel);
982-
OID_ARRAY_TO_LIST(partdesc->oids,partdesc->nparts,all_parts);
993+
parted_rel_parents=lappend(parted_rel_parents,parent);
994+
APPEND_REL_PARTITION_OIDS(partrel,all_parts,all_parents);
983995
}
984996
else
985997
heap_close(partrel,NoLock);
@@ -1004,10 +1016,12 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
10041016
sizeof(PartitionDispatchData*));
10051017
*leaf_part_oids=NIL;
10061018
i=k=offset=0;
1007-
foreach(lc,parted_rels)
1019+
forboth(lc1,parted_rels,lc2,parted_rel_parents)
10081020
{
1009-
Relationpartrel=lfirst(lc);
1021+
Relationpartrel=lfirst(lc1);
1022+
Relationparent=lfirst(lc2);
10101023
PartitionKeypartkey=RelationGetPartitionKey(partrel);
1024+
TupleDesctupdesc=RelationGetDescr(partrel);
10111025
PartitionDescpartdesc=RelationGetPartitionDesc(partrel);
10121026
intj,
10131027
m;
@@ -1017,6 +1031,27 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
10171031
pd[i]->key=partkey;
10181032
pd[i]->keystate=NIL;
10191033
pd[i]->partdesc=partdesc;
1034+
if (parent!=NULL)
1035+
{
1036+
/*
1037+
* For every partitioned table other than root, we must store
1038+
* a tuple table slot initialized with its tuple descriptor and
1039+
* a tuple conversion map to convert a tuple from its parent's
1040+
* rowtype to its own. That is to make sure that we are looking
1041+
* at the correct row using the correct tuple descriptor when
1042+
* computing its partition key for tuple routing.
1043+
*/
1044+
pd[i]->tupslot=MakeSingleTupleTableSlot(tupdesc);
1045+
pd[i]->tupmap=convert_tuples_by_name(RelationGetDescr(parent),
1046+
tupdesc,
1047+
gettext_noop("could not convert row type"));
1048+
}
1049+
else
1050+
{
1051+
/* Not required for the root partitioned table */
1052+
pd[i]->tupslot=NULL;
1053+
pd[i]->tupmap=NULL;
1054+
}
10201055
pd[i]->indexes= (int*)palloc(partdesc->nparts*sizeof(int));
10211056

10221057
/*
@@ -1610,6 +1645,8 @@ get_partition_for_tuple(PartitionDispatch *pd,
16101645
{
16111646
PartitionKeykey=parent->key;
16121647
PartitionDescpartdesc=parent->partdesc;
1648+
TupleTableSlot*myslot=parent->tupslot;
1649+
TupleConversionMap*map=parent->tupmap;
16131650

16141651
/* Quick exit */
16151652
if (partdesc->nparts==0)
@@ -1618,6 +1655,17 @@ get_partition_for_tuple(PartitionDispatch *pd,
16181655
return-1;
16191656
}
16201657

1658+
if (myslot!=NULL)
1659+
{
1660+
HeapTupletuple=ExecFetchSlotTuple(slot);
1661+
1662+
ExecClearTuple(myslot);
1663+
Assert(map!=NULL);
1664+
tuple=do_convert_tuple(tuple,map);
1665+
ExecStoreTuple(tuple,myslot,InvalidBuffer, true);
1666+
slot=myslot;
1667+
}
1668+
16211669
/* Extract partition key from tuple */
16221670
FormPartitionKeyDatum(parent,slot,estate,values,isnull);
16231671

‎src/backend/commands/copy.c

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2435,6 +2435,15 @@ CopyFrom(CopyState cstate)
24352435
/* Triggers might need a slot as well */
24362436
estate->es_trig_tuple_slot=ExecInitExtraTupleSlot(estate);
24372437

2438+
/*
2439+
* Initialize a dedicated slot to manipulate tuples of any given
2440+
* partition's rowtype.
2441+
*/
2442+
if (cstate->partition_dispatch_info)
2443+
estate->es_partition_tuple_slot=ExecInitExtraTupleSlot(estate);
2444+
else
2445+
estate->es_partition_tuple_slot=NULL;
2446+
24382447
/*
24392448
* It's more efficient to prepare a bunch of tuples for insertion, and
24402449
* insert them in one heap_multi_insert() call, than call heap_insert()
@@ -2484,7 +2493,8 @@ CopyFrom(CopyState cstate)
24842493

24852494
for (;;)
24862495
{
2487-
TupleTableSlot*slot;
2496+
TupleTableSlot*slot,
2497+
*oldslot=NULL;
24882498
boolskip_tuple;
24892499
Oidloaded_oid=InvalidOid;
24902500

@@ -2571,7 +2581,19 @@ CopyFrom(CopyState cstate)
25712581
map=cstate->partition_tupconv_maps[leaf_part_index];
25722582
if (map)
25732583
{
2584+
Relationpartrel=resultRelInfo->ri_RelationDesc;
2585+
25742586
tuple=do_convert_tuple(tuple,map);
2587+
2588+
/*
2589+
* We must use the partition's tuple descriptor from this
2590+
* point on. Use a dedicated slot from this point on until
2591+
* we're finished dealing with the partition.
2592+
*/
2593+
oldslot=slot;
2594+
slot=estate->es_partition_tuple_slot;
2595+
Assert(slot!=NULL);
2596+
ExecSetSlotDescriptor(slot,RelationGetDescr(partrel));
25752597
ExecStoreTuple(tuple,slot,InvalidBuffer, true);
25762598
}
25772599

@@ -2667,6 +2689,10 @@ CopyFrom(CopyState cstate)
26672689
{
26682690
resultRelInfo=saved_resultRelInfo;
26692691
estate->es_result_relation_info=resultRelInfo;
2692+
2693+
/* Switch back to the slot corresponding to the root table */
2694+
Assert(oldslot!=NULL);
2695+
slot=oldslot;
26702696
}
26712697
}
26722698
}
@@ -2714,13 +2740,14 @@ CopyFrom(CopyState cstate)
27142740
* Remember cstate->partition_dispatch_info[0] corresponds to the root
27152741
* partitioned table, which we must not try to close, because it is
27162742
* the main target table of COPY that will be closed eventually by
2717-
* DoCopy().
2743+
* DoCopy(). Also, tupslot is NULL for the root partitioned table.
27182744
*/
27192745
for (i=1;i<cstate->num_dispatch;i++)
27202746
{
27212747
PartitionDispatchpd=cstate->partition_dispatch_info[i];
27222748

27232749
heap_close(pd->reldesc,NoLock);
2750+
ExecDropSingleTupleTableSlot(pd->tupslot);
27242751
}
27252752
for (i=0;i<cstate->num_partitions;i++)
27262753
{

‎src/backend/executor/nodeModifyTable.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ ExecInsert(ModifyTableState *mtstate,
262262
RelationresultRelationDesc;
263263
OidnewId;
264264
List*recheckIndexes=NIL;
265+
TupleTableSlot*oldslot=NULL;
265266

266267
/*
267268
* get the heap tuple out of the tuple table slot, making sure we have a
@@ -318,7 +319,19 @@ ExecInsert(ModifyTableState *mtstate,
318319
map=mtstate->mt_partition_tupconv_maps[leaf_part_index];
319320
if (map)
320321
{
322+
Relationpartrel=resultRelInfo->ri_RelationDesc;
323+
321324
tuple=do_convert_tuple(tuple,map);
325+
326+
/*
327+
* We must use the partition's tuple descriptor from this
328+
* point on, until we're finished dealing with the partition.
329+
* Use the dedicated slot for that.
330+
*/
331+
oldslot=slot;
332+
slot=estate->es_partition_tuple_slot;
333+
Assert(slot!=NULL);
334+
ExecSetSlotDescriptor(slot,RelationGetDescr(partrel));
322335
ExecStoreTuple(tuple,slot,InvalidBuffer, true);
323336
}
324337
}
@@ -566,6 +579,10 @@ ExecInsert(ModifyTableState *mtstate,
566579
{
567580
resultRelInfo=saved_resultRelInfo;
568581
estate->es_result_relation_info=resultRelInfo;
582+
583+
/* Switch back to the slot corresponding to the root table */
584+
Assert(oldslot!=NULL);
585+
slot=oldslot;
569586
}
570587

571588
/*
@@ -1734,7 +1751,15 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
17341751
mtstate->mt_partitions=partitions;
17351752
mtstate->mt_num_partitions=num_partitions;
17361753
mtstate->mt_partition_tupconv_maps=partition_tupconv_maps;
1754+
1755+
/*
1756+
* Initialize a dedicated slot to manipulate tuples of any given
1757+
* partition's rowtype.
1758+
*/
1759+
estate->es_partition_tuple_slot=ExecInitExtraTupleSlot(estate);
17371760
}
1761+
else
1762+
estate->es_partition_tuple_slot=NULL;
17381763

17391764
/*
17401765
* Initialize any WITH CHECK OPTION constraints if needed.
@@ -2058,12 +2083,14 @@ ExecEndModifyTable(ModifyTableState *node)
20582083
* Remember node->mt_partition_dispatch_info[0] corresponds to the root
20592084
* partitioned table, which we must not try to close, because it is the
20602085
* main target table of the query that will be closed by ExecEndPlan().
2086+
* Also, tupslot is NULL for the root partitioned table.
20612087
*/
20622088
for (i=1;i<node->mt_num_dispatch;i++)
20632089
{
20642090
PartitionDispatchpd=node->mt_partition_dispatch_info[i];
20652091

20662092
heap_close(pd->reldesc,NoLock);
2093+
ExecDropSingleTupleTableSlot(pd->tupslot);
20672094
}
20682095
for (i=0;i<node->mt_num_partitions;i++)
20692096
{

‎src/include/catalog/partition.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ typedef struct PartitionDescData *PartitionDesc;
4747
*keyPartition key information of the table
4848
*keystateExecution state required for expressions in the partition key
4949
*partdescPartition descriptor of the table
50+
*tupslotA standalone TupleTableSlot initialized with this table's tuple
51+
*descriptor
52+
*tupmapTupleConversionMap to convert from the parent's rowtype to
53+
*this table's rowtype (when extracting the partition key of a
54+
*tuple just before routing it through this table)
5055
*indexesArray with partdesc->nparts members (for details on what
5156
*individual members represent, see how they are set in
5257
*RelationGetPartitionDispatchInfo())
@@ -58,6 +63,8 @@ typedef struct PartitionDispatchData
5863
PartitionKeykey;
5964
List*keystate;/* list of ExprState */
6065
PartitionDescpartdesc;
66+
TupleTableSlot*tupslot;
67+
TupleConversionMap*tupmap;
6168
int*indexes;
6269
}PartitionDispatchData;
6370

‎src/include/nodes/execnodes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,9 @@ typedef struct EState
384384
TupleTableSlot*es_trig_oldtup_slot;/* for TriggerEnabled */
385385
TupleTableSlot*es_trig_newtup_slot;/* for TriggerEnabled */
386386

387+
/* Slot used to manipulate a tuple after it is routed to a partition */
388+
TupleTableSlot*es_partition_tuple_slot;
389+
387390
/* Parameter info: */
388391
ParamListInfoes_param_list_info;/* values of external params */
389392
ParamExecData*es_param_exec_vals;/* values of internal params */

‎src/test/regress/expected/insert.out

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,40 @@ drop cascades to table part_null
300300
drop cascades to table part_ee_ff
301301
drop cascades to table part_ee_ff1
302302
drop cascades to table part_ee_ff2
303+
-- more tests for certain multi-level partitioning scenarios
304+
create table p (a int, b int) partition by range (a, b);
305+
create table p1 (b int, a int not null) partition by range (b);
306+
create table p11 (like p1);
307+
alter table p11 drop a;
308+
alter table p11 add a int;
309+
alter table p11 drop a;
310+
alter table p11 add a int not null;
311+
-- attnum for key attribute 'a' is different in p, p1, and p11
312+
select attrelid::regclass, attname, attnum
313+
from pg_attribute
314+
where attname = 'a'
315+
and (attrelid = 'p'::regclass
316+
or attrelid = 'p1'::regclass
317+
or attrelid = 'p11'::regclass);
318+
attrelid | attname | attnum
319+
----------+---------+--------
320+
p | a | 1
321+
p1 | a | 2
322+
p11 | a | 4
323+
(3 rows)
324+
325+
alter table p1 attach partition p11 for values from (2) to (5);
326+
alter table p attach partition p1 for values from (1, 2) to (1, 10);
327+
-- check that "(1, 2)" is correctly routed to p11.
328+
insert into p values (1, 2);
329+
select tableoid::regclass, * from p;
330+
tableoid | a | b
331+
----------+---+---
332+
p11 | 1 | 2
333+
(1 row)
334+
335+
-- cleanup
336+
drop table p cascade;
337+
NOTICE: drop cascades to 2 other objects
338+
DETAIL: drop cascades to table p1
339+
drop cascades to table p11

‎src/test/regress/sql/insert.sql

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,29 @@ select tableoid::regclass, * from list_parted;
170170
-- cleanup
171171
droptable range_parted cascade;
172172
droptable list_parted cascade;
173+
174+
-- more tests for certain multi-level partitioning scenarios
175+
createtablep (aint, bint) partition by range (a, b);
176+
createtablep1 (bint, aintnot null) partition by range (b);
177+
createtablep11 (like p1);
178+
altertable p11 drop a;
179+
altertable p11 add aint;
180+
altertable p11 drop a;
181+
altertable p11 add aintnot null;
182+
-- attnum for key attribute 'a' is different in p, p1, and p11
183+
select attrelid::regclass, attname, attnum
184+
from pg_attribute
185+
where attname='a'
186+
and (attrelid='p'::regclass
187+
or attrelid='p1'::regclass
188+
or attrelid='p11'::regclass);
189+
190+
altertable p1 attach partition p11 forvaluesfrom (2) to (5);
191+
altertable p attach partition p1 forvaluesfrom (1,2) to (1,10);
192+
193+
-- check that "(1, 2)" is correctly routed to p11.
194+
insert into pvalues (1,2);
195+
select tableoid::regclass,*from p;
196+
197+
-- cleanup
198+
droptable p cascade;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp