Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9fdb675

Browse files
committed
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a moresophisticated algorithm for partition pruning. The new module uses eachpartition's "boundinfo" for pruning instead of constraint exclusion,based on an idea proposed by Robert Haas of a "pruning program": a listof steps generated from the query quals which are run iteratively toobtain a list of partitions that must be scanned in order to satisfythose quals.At present, this targets planner-time partition pruning, but there existfurther patches to apply partition pruning at execution time as well.This commit also moves some definitions from include/catalog/partition.hto a new file include/partitioning/partbounds.h, in an attempt torationalize partitioning related code.Authors: Amit Langote, David Rowley, Dilip KumarReviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.Discussion:https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
1 parent11523e8 commit9fdb675

File tree

27 files changed

+3994
-416
lines changed

27 files changed

+3994
-416
lines changed

‎src/backend/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ top_builddir = ../..
1818
include$(top_builddir)/src/Makefile.global
1919

2020
SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq\
21-
main nodes optimizer port postmaster regex replication rewrite\
21+
main nodes optimizer partitioning port postmaster\
22+
regex replication rewrite\
2223
statistics storage tcop tsearch utils$(top_builddir)/src/timezone\
2324
jit
2425

‎src/backend/catalog/partition.c

Lines changed: 16 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include"optimizer/prep.h"
4242
#include"optimizer/var.h"
4343
#include"parser/parse_coerce.h"
44+
#include"partitioning/partbounds.h"
4445
#include"rewrite/rewriteManip.h"
4546
#include"storage/lmgr.h"
4647
#include"utils/array.h"
@@ -55,89 +56,6 @@
5556
#include"utils/ruleutils.h"
5657
#include"utils/syscache.h"
5758

58-
/*
59-
* Information about bounds of a partitioned relation
60-
*
61-
* A list partition datum that is known to be NULL is never put into the
62-
* datums array. Instead, it is tracked using the null_index field.
63-
*
64-
* In the case of range partitioning, ndatums will typically be far less than
65-
* 2 * nparts, because a partition's upper bound and the next partition's lower
66-
* bound are the same in most common cases, and we only store one of them (the
67-
* upper bound). In case of hash partitioning, ndatums will be same as the
68-
* number of partitions.
69-
*
70-
* For range and list partitioned tables, datums is an array of datum-tuples
71-
* with key->partnatts datums each. For hash partitioned tables, it is an array
72-
* of datum-tuples with 2 datums, modulus and remainder, corresponding to a
73-
* given partition.
74-
*
75-
* The datums in datums array are arranged in increasing order as defined by
76-
* functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and
77-
* qsort_partition_hbound_cmp() for range, list and hash partitioned tables
78-
* respectively. For range and list partitions this simply means that the
79-
* datums in the datums array are arranged in increasing order as defined by
80-
* the partition key's operator classes and collations.
81-
*
82-
* In the case of list partitioning, the indexes array stores one entry for
83-
* every datum, which is the index of the partition that accepts a given datum.
84-
* In case of range partitioning, it stores one entry per distinct range
85-
* datum, which is the index of the partition for which a given datum
86-
* is an upper bound. In the case of hash partitioning, the number of the
87-
* entries in the indexes array is same as the greatest modulus amongst all
88-
* partitions. For a given partition key datum-tuple, the index of the
89-
* partition which would accept that datum-tuple would be given by the entry
90-
* pointed by remainder produced when hash value of the datum-tuple is divided
91-
* by the greatest modulus.
92-
*/
93-
94-
typedefstructPartitionBoundInfoData
95-
{
96-
charstrategy;/* hash, list or range? */
97-
intndatums;/* Length of the datums following array */
98-
Datum**datums;
99-
PartitionRangeDatumKind**kind;/* The kind of each range bound datum;
100-
* NULL for hash and list partitioned
101-
* tables */
102-
int*indexes;/* Partition indexes */
103-
intnull_index;/* Index of the null-accepting partition; -1
104-
* if there isn't one */
105-
intdefault_index;/* Index of the default partition; -1 if there
106-
* isn't one */
107-
}PartitionBoundInfoData;
108-
109-
#definepartition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
110-
#definepartition_bound_has_default(bi) ((bi)->default_index != -1)
111-
112-
/*
113-
* When qsort'ing partition bounds after reading from the catalog, each bound
114-
* is represented with one of the following structs.
115-
*/
116-
117-
/* One bound of a hash partition */
118-
typedefstructPartitionHashBound
119-
{
120-
intmodulus;
121-
intremainder;
122-
intindex;
123-
}PartitionHashBound;
124-
125-
/* One value coming from some (index'th) list partition */
126-
typedefstructPartitionListValue
127-
{
128-
intindex;
129-
Datumvalue;
130-
}PartitionListValue;
131-
132-
/* One bound of a range partition */
133-
typedefstructPartitionRangeBound
134-
{
135-
intindex;
136-
Datum*datums;/* range bound datums */
137-
PartitionRangeDatumKind*kind;/* the kind of each datum */
138-
boollower;/* this is the lower (vs upper) bound */
139-
}PartitionRangeBound;
140-
14159

14260
staticOidget_partition_parent_worker(RelationinhRel,Oidrelid);
14361
staticvoidget_partition_ancestors_worker(RelationinhRel,Oidrelid,
@@ -173,29 +91,9 @@ static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
17391
Oid*partcollation,Datum*datums1,
17492
PartitionRangeDatumKind*kind1,boollower1,
17593
PartitionRangeBound*b2);
176-
staticint32partition_rbound_datum_cmp(FmgrInfo*partsupfunc,
177-
Oid*partcollation,
178-
Datum*rb_datums,PartitionRangeDatumKind*rb_kind,
179-
Datum*tuple_datums,intn_tuple_datums);
180-
181-
staticintpartition_list_bsearch(FmgrInfo*partsupfunc,Oid*partcollation,
182-
PartitionBoundInfoboundinfo,
183-
Datumvalue,bool*is_equal);
184-
staticintpartition_range_bsearch(intpartnatts,FmgrInfo*partsupfunc,
185-
Oid*partcollation,
186-
PartitionBoundInfoboundinfo,
187-
PartitionRangeBound*probe,bool*is_equal);
188-
staticintpartition_range_datum_bsearch(FmgrInfo*partsupfunc,
189-
Oid*partcollation,
190-
PartitionBoundInfoboundinfo,
191-
intnvalues,Datum*values,bool*is_equal);
192-
staticintpartition_hash_bsearch(PartitionBoundInfoboundinfo,
193-
intmodulus,intremainder);
19494

19595
staticintget_partition_bound_num_indexes(PartitionBoundInfob);
196-
staticintget_greatest_modulus(PartitionBoundInfob);
197-
staticuint64compute_hash_value(intpartnatts,FmgrInfo*partsupfunc,
198-
Datum*values,bool*isnull);
96+
19997

20098
/*
20199
* RelationBuildPartitionDesc
@@ -765,13 +663,13 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
765663

766664
if (b1->strategy==PARTITION_STRATEGY_HASH)
767665
{
768-
intgreatest_modulus=get_greatest_modulus(b1);
666+
intgreatest_modulus=get_hash_partition_greatest_modulus(b1);
769667

770668
/*
771669
* If two hash partitioned tables have different greatest moduli,
772670
* their partition schemes don't match.
773671
*/
774-
if (greatest_modulus!=get_greatest_modulus(b2))
672+
if (greatest_modulus!=get_hash_partition_greatest_modulus(b2))
775673
return false;
776674

777675
/*
@@ -1029,7 +927,7 @@ check_new_partition_bound(char *relname, Relation parent,
1029927
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1030928
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
1031929

1032-
greatest_modulus=get_greatest_modulus(boundinfo);
930+
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
1033931
remainder=spec->remainder;
1034932

1035933
/*
@@ -1620,7 +1518,6 @@ get_partition_qual_relid(Oid relid)
16201518
returnresult;
16211519
}
16221520

1623-
/* Module-local functions */
16241521

16251522
/*
16261523
* get_partition_operator
@@ -2637,7 +2534,7 @@ get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
26372534
casePARTITION_STRATEGY_HASH:
26382535
{
26392536
PartitionBoundInfoboundinfo=partdesc->boundinfo;
2640-
intgreatest_modulus=get_greatest_modulus(boundinfo);
2537+
intgreatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
26412538
uint64rowHash=compute_hash_value(key->partnatts,
26422539
key->partsupfunc,
26432540
values,isnull);
@@ -2971,7 +2868,7 @@ partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
29712868
* of attributes resp.
29722869
*
29732870
*/
2974-
staticint32
2871+
int32
29752872
partition_rbound_datum_cmp(FmgrInfo*partsupfunc,Oid*partcollation,
29762873
Datum*rb_datums,PartitionRangeDatumKind*rb_kind,
29772874
Datum*tuple_datums,intn_tuple_datums)
@@ -3005,7 +2902,7 @@ partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
30052902
* *is_equal is set to true if the bound datum at the returned index is equal
30062903
* to the input value.
30072904
*/
3008-
staticint
2905+
int
30092906
partition_list_bsearch(FmgrInfo*partsupfunc,Oid*partcollation,
30102907
PartitionBoundInfoboundinfo,
30112908
Datumvalue,bool*is_equal)
@@ -3048,7 +2945,7 @@ partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
30482945
* *is_equal is set to true if the range bound at the returned index is equal
30492946
* to the input range bound
30502947
*/
3051-
staticint
2948+
int
30522949
partition_range_bsearch(intpartnatts,FmgrInfo*partsupfunc,
30532950
Oid*partcollation,
30542951
PartitionBoundInfoboundinfo,
@@ -3093,7 +2990,7 @@ partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
30932990
* *is_equal is set to true if the range bound at the returned index is equal
30942991
* to the input tuple.
30952992
*/
3096-
staticint
2993+
int
30972994
partition_range_datum_bsearch(FmgrInfo*partsupfunc,Oid*partcollation,
30982995
PartitionBoundInfoboundinfo,
30992996
intnvalues,Datum*values,bool*is_equal)
@@ -3136,7 +3033,7 @@ partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
31363033
*less than or equal to the given (modulus, remainder) pair or -1 if
31373034
*all of them are greater
31383035
*/
3139-
staticint
3036+
int
31403037
partition_hash_bsearch(PartitionBoundInfoboundinfo,
31413038
intmodulus,intremainder)
31423039
{
@@ -3294,7 +3191,7 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
32943191
* The number of the entries in the indexes array is same as the
32953192
* greatest modulus.
32963193
*/
3297-
num_indexes=get_greatest_modulus(bound);
3194+
num_indexes=get_hash_partition_greatest_modulus(bound);
32983195
break;
32993196

33003197
casePARTITION_STRATEGY_LIST:
@@ -3315,14 +3212,14 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
33153212
}
33163213

33173214
/*
3318-
*get_greatest_modulus
3215+
*get_hash_partition_greatest_modulus
33193216
*
33203217
* Returns the greatest modulus of the hash partition bound. The greatest
33213218
* modulus will be at the end of the datums array because hash partitions are
33223219
* arranged in the ascending order of their modulus and remainders.
33233220
*/
3324-
staticint
3325-
get_greatest_modulus(PartitionBoundInfobound)
3221+
int
3222+
get_hash_partition_greatest_modulus(PartitionBoundInfobound)
33263223
{
33273224
Assert(bound&&bound->strategy==PARTITION_STRATEGY_HASH);
33283225
Assert(bound->datums&&bound->ndatums>0);
@@ -3336,7 +3233,7 @@ get_greatest_modulus(PartitionBoundInfo bound)
33363233
*
33373234
* Compute the hash value for given not null partition key values.
33383235
*/
3339-
staticuint64
3236+
uint64
33403237
compute_hash_value(intpartnatts,FmgrInfo*partsupfunc,
33413238
Datum*values,bool*isnull)
33423239
{

‎src/backend/nodes/copyfuncs.c

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2150,6 +2150,38 @@ _copyMergeAction(const MergeAction *from)
21502150
returnnewnode;
21512151
}
21522152

2153+
/*
2154+
* _copyPartitionPruneStepOp
2155+
*/
2156+
staticPartitionPruneStepOp*
2157+
_copyPartitionPruneStepOp(constPartitionPruneStepOp*from)
2158+
{
2159+
PartitionPruneStepOp*newnode=makeNode(PartitionPruneStepOp);
2160+
2161+
COPY_SCALAR_FIELD(step.step_id);
2162+
COPY_SCALAR_FIELD(opstrategy);
2163+
COPY_NODE_FIELD(exprs);
2164+
COPY_NODE_FIELD(cmpfns);
2165+
COPY_BITMAPSET_FIELD(nullkeys);
2166+
2167+
returnnewnode;
2168+
}
2169+
2170+
/*
2171+
* _copyPartitionPruneStepCombine
2172+
*/
2173+
staticPartitionPruneStepCombine*
2174+
_copyPartitionPruneStepCombine(constPartitionPruneStepCombine*from)
2175+
{
2176+
PartitionPruneStepCombine*newnode=makeNode(PartitionPruneStepCombine);
2177+
2178+
COPY_SCALAR_FIELD(step.step_id);
2179+
COPY_SCALAR_FIELD(combineOp);
2180+
COPY_NODE_FIELD(source_stepids);
2181+
2182+
returnnewnode;
2183+
}
2184+
21532185
/* ****************************************************************
21542186
*relation.h copy functions
21552187
*
@@ -2277,21 +2309,6 @@ _copyAppendRelInfo(const AppendRelInfo *from)
22772309
returnnewnode;
22782310
}
22792311

2280-
/*
2281-
* _copyPartitionedChildRelInfo
2282-
*/
2283-
staticPartitionedChildRelInfo*
2284-
_copyPartitionedChildRelInfo(constPartitionedChildRelInfo*from)
2285-
{
2286-
PartitionedChildRelInfo*newnode=makeNode(PartitionedChildRelInfo);
2287-
2288-
COPY_SCALAR_FIELD(parent_relid);
2289-
COPY_NODE_FIELD(child_rels);
2290-
COPY_SCALAR_FIELD(part_cols_updated);
2291-
2292-
returnnewnode;
2293-
}
2294-
22952312
/*
22962313
* _copyPlaceHolderInfo
22972314
*/
@@ -5076,6 +5093,12 @@ copyObjectImpl(const void *from)
50765093
caseT_MergeAction:
50775094
retval=_copyMergeAction(from);
50785095
break;
5096+
caseT_PartitionPruneStepOp:
5097+
retval=_copyPartitionPruneStepOp(from);
5098+
break;
5099+
caseT_PartitionPruneStepCombine:
5100+
retval=_copyPartitionPruneStepCombine(from);
5101+
break;
50795102

50805103
/*
50815104
* RELATION NODES
@@ -5095,9 +5118,6 @@ copyObjectImpl(const void *from)
50955118
caseT_AppendRelInfo:
50965119
retval=_copyAppendRelInfo(from);
50975120
break;
5098-
caseT_PartitionedChildRelInfo:
5099-
retval=_copyPartitionedChildRelInfo(from);
5100-
break;
51015121
caseT_PlaceHolderInfo:
51025122
retval=_copyPlaceHolderInfo(from);
51035123
break;

‎src/backend/nodes/equalfuncs.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -915,16 +915,6 @@ _equalAppendRelInfo(const AppendRelInfo *a, const AppendRelInfo *b)
915915
return true;
916916
}
917917

918-
staticbool
919-
_equalPartitionedChildRelInfo(constPartitionedChildRelInfo*a,constPartitionedChildRelInfo*b)
920-
{
921-
COMPARE_SCALAR_FIELD(parent_relid);
922-
COMPARE_NODE_FIELD(child_rels);
923-
COMPARE_SCALAR_FIELD(part_cols_updated);
924-
925-
return true;
926-
}
927-
928918
staticbool
929919
_equalPlaceHolderInfo(constPlaceHolderInfo*a,constPlaceHolderInfo*b)
930920
{
@@ -3230,9 +3220,6 @@ equal(const void *a, const void *b)
32303220
caseT_AppendRelInfo:
32313221
retval=_equalAppendRelInfo(a,b);
32323222
break;
3233-
caseT_PartitionedChildRelInfo:
3234-
retval=_equalPartitionedChildRelInfo(a,b);
3235-
break;
32363223
caseT_PlaceHolderInfo:
32373224
retval=_equalPlaceHolderInfo(a,b);
32383225
break;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp