Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit77b6b5e

Browse files
committed
Make RelationGetPartitionDispatchInfo expand depth-first.
With this change, the order of leaf partitions as returned byRelationGetPartitionDispatchInfo should now be the same as theorder used by expand_inherited_rtentry. This will make it simplerfor future patches to match up the partition dispatch informationwith the planner data structures. The new code is also, in myopinion anyway, simpler and easier to understand.Amit Langote, reviewed by Amit Khandekar. I also reviewed andmade a few cosmetic revisions.Discussion:http://postgr.es/m/d98d4761-5071-1762-501e-0e15047c714b@lab.ntt.co.jp
1 parent8951c65 commit77b6b5e

File tree

2 files changed

+116
-143
lines changed

2 files changed

+116
-143
lines changed

‎src/backend/catalog/partition.c

Lines changed: 109 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ static int32 partition_bound_cmp(PartitionKey key,
147147
staticintpartition_bound_bsearch(PartitionKeykey,
148148
PartitionBoundInfoboundinfo,
149149
void*probe,boolprobe_is_bound,bool*is_equal);
150+
staticvoidget_partition_dispatch_recurse(Relationrel,Relationparent,
151+
List**pds,List**leaf_part_oids);
150152

151153
/*
152154
* RelationBuildPartitionDesc
@@ -1191,21 +1193,6 @@ get_partition_qual_relid(Oid relid)
11911193
returnresult;
11921194
}
11931195

1194-
/*
1195-
* Append OIDs of rel's partitions to the list 'partoids' and for each OID,
1196-
* append pointer rel to the list 'parents'.
1197-
*/
1198-
#defineAPPEND_REL_PARTITION_OIDS(rel,partoids,parents) \
1199-
do\
1200-
{\
1201-
inti;\
1202-
for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\
1203-
{\
1204-
(partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\
1205-
(parents) = lappend((parents), (rel));\
1206-
}\
1207-
} while(0)
1208-
12091196
/*
12101197
* RelationGetPartitionDispatchInfo
12111198
*Returns information necessary to route tuples down a partition tree
@@ -1222,151 +1209,130 @@ PartitionDispatch *
12221209
RelationGetPartitionDispatchInfo(Relationrel,
12231210
int*num_parted,List**leaf_part_oids)
12241211
{
1212+
List*pdlist=NIL;
12251213
PartitionDispatchData**pd;
1226-
List*all_parts=NIL,
1227-
*all_parents=NIL,
1228-
*parted_rels,
1229-
*parted_rel_parents;
1230-
ListCell*lc1,
1231-
*lc2;
1232-
inti,
1233-
k,
1234-
offset;
1214+
ListCell*lc;
1215+
inti;
12351216

1236-
/*
1237-
* We rely on the relcache to traverse the partition tree to build both
1238-
* the leaf partition OIDs list and the array of PartitionDispatch objects
1239-
* for the partitioned tables in the tree. That means every partitioned
1240-
* table in the tree must be locked, which is fine since we require the
1241-
* caller to lock all the partitions anyway.
1242-
*
1243-
* For every partitioned table in the tree, starting with the root
1244-
* partitioned table, add its relcache entry to parted_rels, while also
1245-
* queuing its partitions (in the order in which they appear in the
1246-
* partition descriptor) to be looked at later in the same loop. This is
1247-
* a bit tricky but works because the foreach() macro doesn't fetch the
1248-
* next list element until the bottom of the loop.
1249-
*/
1250-
*num_parted=1;
1251-
parted_rels=list_make1(rel);
1252-
/* Root partitioned table has no parent, so NULL for parent */
1253-
parted_rel_parents=list_make1(NULL);
1254-
APPEND_REL_PARTITION_OIDS(rel,all_parts,all_parents);
1255-
forboth(lc1,all_parts,lc2,all_parents)
1217+
Assert(rel->rd_rel->relkind==RELKIND_PARTITIONED_TABLE);
1218+
1219+
*num_parted=0;
1220+
*leaf_part_oids=NIL;
1221+
1222+
get_partition_dispatch_recurse(rel,NULL,&pdlist,leaf_part_oids);
1223+
*num_parted=list_length(pdlist);
1224+
pd= (PartitionDispatchData**)palloc(*num_parted*
1225+
sizeof(PartitionDispatchData*));
1226+
i=0;
1227+
foreach(lc,pdlist)
12561228
{
1257-
Oidpartrelid=lfirst_oid(lc1);
1258-
Relationparent=lfirst(lc2);
1229+
pd[i++]=lfirst(lc);
1230+
}
12591231

1260-
if (get_rel_relkind(partrelid)==RELKIND_PARTITIONED_TABLE)
1261-
{
1262-
/*
1263-
* Already locked by the caller. Note that it is the
1264-
* responsibility of the caller to close the below relcache entry,
1265-
* once done using the information being collected here (for
1266-
* example, in ExecEndModifyTable).
1267-
*/
1268-
Relationpartrel=heap_open(partrelid,NoLock);
1232+
returnpd;
1233+
}
12691234

1270-
(*num_parted)++;
1271-
parted_rels=lappend(parted_rels,partrel);
1272-
parted_rel_parents=lappend(parted_rel_parents,parent);
1273-
APPEND_REL_PARTITION_OIDS(partrel,all_parts,all_parents);
1274-
}
1235+
/*
1236+
* get_partition_dispatch_recurse
1237+
*Recursively expand partition tree rooted at rel
1238+
*
1239+
* As the partition tree is expanded in a depth-first manner, we mantain two
1240+
* global lists: of PartitionDispatch objects corresponding to partitioned
1241+
* tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
1242+
*
1243+
* Note that the order of OIDs of leaf partitions in leaf_part_oids matches
1244+
* the order in which the planner's expand_partitioned_rtentry() processes
1245+
* them. It's not necessarily the case that the offsets match up exactly,
1246+
* because constraint exclusion might prune away some partitions on the
1247+
* planner side, whereas we'll always have the complete list; but unpruned
1248+
* partitions will appear in the same order in the plan as they are returned
1249+
* here.
1250+
*/
1251+
staticvoid
1252+
get_partition_dispatch_recurse(Relationrel,Relationparent,
1253+
List**pds,List**leaf_part_oids)
1254+
{
1255+
TupleDesctupdesc=RelationGetDescr(rel);
1256+
PartitionDescpartdesc=RelationGetPartitionDesc(rel);
1257+
PartitionKeypartkey=RelationGetPartitionKey(rel);
1258+
PartitionDispatchpd;
1259+
inti;
1260+
1261+
check_stack_depth();
1262+
1263+
/* Build a PartitionDispatch for this table and add it to *pds. */
1264+
pd= (PartitionDispatch)palloc(sizeof(PartitionDispatchData));
1265+
*pds=lappend(*pds,pd);
1266+
pd->reldesc=rel;
1267+
pd->key=partkey;
1268+
pd->keystate=NIL;
1269+
pd->partdesc=partdesc;
1270+
if (parent!=NULL)
1271+
{
1272+
/*
1273+
* For every partitioned table other than the root, we must store a
1274+
* tuple table slot initialized with its tuple descriptor and a tuple
1275+
* conversion map to convert a tuple from its parent's rowtype to its
1276+
* own. That is to make sure that we are looking at the correct row
1277+
* using the correct tuple descriptor when computing its partition key
1278+
* for tuple routing.
1279+
*/
1280+
pd->tupslot=MakeSingleTupleTableSlot(tupdesc);
1281+
pd->tupmap=convert_tuples_by_name(RelationGetDescr(parent),
1282+
tupdesc,
1283+
gettext_noop("could not convert row type"));
1284+
}
1285+
else
1286+
{
1287+
/* Not required for the root partitioned table */
1288+
pd->tupslot=NULL;
1289+
pd->tupmap=NULL;
12751290
}
12761291

12771292
/*
1278-
* We want to create two arrays - one for leaf partitions and another for
1279-
* partitioned tables (including the root table and internal partitions).
1280-
* While we only create the latter here, leaf partition array of suitable
1281-
* objects (such as, ResultRelInfo) is created by the caller using the
1282-
* list of OIDs we return. Indexes into these arrays get assigned in a
1283-
* breadth-first manner, whereby partitions of any given level are placed
1284-
* consecutively in the respective arrays.
1293+
* Go look at each partition of this table. If it's a leaf partition,
1294+
* simply add its OID to *leaf_part_oids. If it's a partitioned table,
1295+
* recursively call get_partition_dispatch_recurse(), so that its
1296+
* partitions are processed as well and a corresponding PartitionDispatch
1297+
* object gets added to *pds.
1298+
*
1299+
* About the values in pd->indexes: for a leaf partition, it contains the
1300+
* leaf partition's position in the global list *leaf_part_oids minus 1,
1301+
* whereas for a partitioned table partition, it contains the partition's
1302+
* position in the global list *pds multiplied by -1. The latter is
1303+
* multiplied by -1 to distinguish partitioned tables from leaf partitions
1304+
* when going through the values in pd->indexes. So, for example, when
1305+
* using it during tuple-routing, encountering a value >= 0 means we found
1306+
* a leaf partition. It is immediately returned as the index in the array
1307+
* of ResultRelInfos of all the leaf partitions, using which we insert the
1308+
* tuple into that leaf partition. A negative value means we found a
1309+
* partitioned table. The value multiplied by -1 is returned as the index
1310+
* in the array of PartitionDispatch objects of all partitioned tables in
1311+
* the tree. This value is used to continue the search in the next level
1312+
* of the partition tree.
12851313
*/
1286-
pd= (PartitionDispatchData**)palloc(*num_parted*
1287-
sizeof(PartitionDispatchData*));
1288-
*leaf_part_oids=NIL;
1289-
i=k=offset=0;
1290-
forboth(lc1,parted_rels,lc2,parted_rel_parents)
1314+
pd->indexes= (int*)palloc(partdesc->nparts*sizeof(int));
1315+
for (i=0;i<partdesc->nparts;i++)
12911316
{
1292-
Relationpartrel=lfirst(lc1);
1293-
Relationparent=lfirst(lc2);
1294-
PartitionKeypartkey=RelationGetPartitionKey(partrel);
1295-
TupleDesctupdesc=RelationGetDescr(partrel);
1296-
PartitionDescpartdesc=RelationGetPartitionDesc(partrel);
1297-
intj,
1298-
m;
1299-
1300-
pd[i]= (PartitionDispatch)palloc(sizeof(PartitionDispatchData));
1301-
pd[i]->reldesc=partrel;
1302-
pd[i]->key=partkey;
1303-
pd[i]->keystate=NIL;
1304-
pd[i]->partdesc=partdesc;
1305-
if (parent!=NULL)
1317+
Oidpartrelid=partdesc->oids[i];
1318+
1319+
if (get_rel_relkind(partrelid)!=RELKIND_PARTITIONED_TABLE)
13061320
{
1307-
/*
1308-
* For every partitioned table other than root, we must store a
1309-
* tuple table slot initialized with its tuple descriptor and a
1310-
* tuple conversion map to convert a tuple from its parent's
1311-
* rowtype to its own. That is to make sure that we are looking at
1312-
* the correct row using the correct tuple descriptor when
1313-
* computing its partition key for tuple routing.
1314-
*/
1315-
pd[i]->tupslot=MakeSingleTupleTableSlot(tupdesc);
1316-
pd[i]->tupmap=convert_tuples_by_name(RelationGetDescr(parent),
1317-
tupdesc,
1318-
gettext_noop("could not convert row type"));
1321+
*leaf_part_oids=lappend_oid(*leaf_part_oids,partrelid);
1322+
pd->indexes[i]=list_length(*leaf_part_oids)-1;
13191323
}
13201324
else
13211325
{
1322-
/* Not required for the root partitioned table */
1323-
pd[i]->tupslot=NULL;
1324-
pd[i]->tupmap=NULL;
1325-
}
1326-
pd[i]->indexes= (int*)palloc(partdesc->nparts*sizeof(int));
1327-
1328-
/*
1329-
* Indexes corresponding to the internal partitions are multiplied by
1330-
* -1 to distinguish them from those of leaf partitions. Encountering
1331-
* an index >= 0 means we found a leaf partition, which is immediately
1332-
* returned as the partition we are looking for. A negative index
1333-
* means we found a partitioned table, whose PartitionDispatch object
1334-
* is located at the above index multiplied back by -1. Using the
1335-
* PartitionDispatch object, search is continued further down the
1336-
* partition tree.
1337-
*/
1338-
m=0;
1339-
for (j=0;j<partdesc->nparts;j++)
1340-
{
1341-
Oidpartrelid=partdesc->oids[j];
1326+
/*
1327+
* We assume all tables in the partition tree were already locked
1328+
* by the caller.
1329+
*/
1330+
Relationpartrel=heap_open(partrelid,NoLock);
13421331

1343-
if (get_rel_relkind(partrelid)!=RELKIND_PARTITIONED_TABLE)
1344-
{
1345-
*leaf_part_oids=lappend_oid(*leaf_part_oids,partrelid);
1346-
pd[i]->indexes[j]=k++;
1347-
}
1348-
else
1349-
{
1350-
/*
1351-
* offset denotes the number of partitioned tables of upper
1352-
* levels including those of the current level. Any partition
1353-
* of this table must belong to the next level and hence will
1354-
* be placed after the last partitioned table of this level.
1355-
*/
1356-
pd[i]->indexes[j]=-(1+offset+m);
1357-
m++;
1358-
}
1332+
pd->indexes[i]=-list_length(*pds);
1333+
get_partition_dispatch_recurse(partrel,rel,pds,leaf_part_oids);
13591334
}
1360-
i++;
1361-
1362-
/*
1363-
* This counts the number of partitioned tables at upper levels
1364-
* including those of the current level.
1365-
*/
1366-
offset+=m;
13671335
}
1368-
1369-
returnpd;
13701336
}
13711337

13721338
/* Module-local functions */

‎src/backend/optimizer/prep/prepunion.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,6 +1565,13 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
15651565
root->append_rel_list=list_concat(root->append_rel_list,appinfos);
15661566
}
15671567

1568+
/*
1569+
* expand_partitioned_rtentry
1570+
*Recursively expand an RTE for a partitioned table.
1571+
*
1572+
* Note that RelationGetPartitionDispatchInfo will expand partitions in the
1573+
* same order as this code.
1574+
*/
15681575
staticvoid
15691576
expand_partitioned_rtentry(PlannerInfo*root,RangeTblEntry*parentrte,
15701577
IndexparentRTindex,Relationparentrel,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp