Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7f1921c

Browse files
committed
Fix hash partition pruning with asymmetric partition sets.
perform_pruning_combine_step() was not taught about the number ofpartition indexes used in hash partitioning; more embarrassingly,get_matching_hash_bounds() also had it wrong. These errors are maskedin the common case where all the partitions have the same modulusand no partition is missing. However, with missing or unequal-sizepartitions, we could erroneously prune some partitions that needto be scanned, leading to silently wrong query answers.While a minimal-footprint fix for this could be to exportget_partition_bound_num_indexes and make the incorrect functions use it,I'm of the opinion that that function should never have existed in thefirst place. It's not reasonable data structure design thatPartitionBoundInfoData lacks any explicit record of the length ofits indexes[] array. Perhaps that was all right when it could alwaysbe assumed equal to ndatums, but something should have been done aboutit as soon as that stopped being true. Putting in an explicit"nindexes" field makes both partition_bounds_equal() andpartition_bounds_copy() simpler, safer, and faster than before,and removes explicit knowledge of the number-of-partition-indexesrules from some other places too.This change also makes get_hash_partition_greatest_modulus obsolete.I left that in place in case any external code uses it, but no corecode does anymore.Per bug #16840 from Michał Albrycht. Back-patch to v11 where thehash partitioning code came in. (In the back branches, add the newfield at the end of PartitionBoundInfoData to minimize ABI risks.)Discussion:https://postgr.es/m/16840-571a22976f829ad4@postgresql.org
1 parent1449770 commit7f1921c

File tree

6 files changed

+124
-141
lines changed

6 files changed

+124
-141
lines changed

‎src/backend/executor/execPartition.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,16 +1312,14 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
13121312
{
13131313
casePARTITION_STRATEGY_HASH:
13141314
{
1315-
intgreatest_modulus;
13161315
uint64rowHash;
13171316

1318-
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
13191317
rowHash=compute_partition_hash_value(key->partnatts,
13201318
key->partsupfunc,
13211319
key->partcollation,
13221320
values,isnull);
13231321

1324-
part_index=boundinfo->indexes[rowHash %greatest_modulus];
1322+
part_index=boundinfo->indexes[rowHash %boundinfo->nindexes];
13251323
}
13261324
break;
13271325

‎src/backend/partitioning/partbounds.c

Lines changed: 31 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,6 @@ static intpartition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
224224
Oid*partcollation,
225225
PartitionBoundInfoboundinfo,
226226
PartitionRangeBound*probe,bool*is_equal);
227-
staticintget_partition_bound_num_indexes(PartitionBoundInfob);
228227
staticExpr*make_partition_op_expr(PartitionKeykey,intkeynum,
229228
uint16strategy,Expr*arg1,Expr*arg2);
230229
staticOidget_partition_operator(PartitionKeykey,intcol,
@@ -398,6 +397,7 @@ create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
398397

399398
boundinfo->ndatums=ndatums;
400399
boundinfo->datums= (Datum**)palloc0(ndatums*sizeof(Datum*));
400+
boundinfo->nindexes=greatest_modulus;
401401
boundinfo->indexes= (int*)palloc(greatest_modulus*sizeof(int));
402402
for (i=0;i<greatest_modulus;i++)
403403
boundinfo->indexes[i]=-1;
@@ -530,6 +530,7 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
530530

531531
boundinfo->ndatums=ndatums;
532532
boundinfo->datums= (Datum**)palloc0(ndatums*sizeof(Datum*));
533+
boundinfo->nindexes=ndatums;
533534
boundinfo->indexes= (int*)palloc(ndatums*sizeof(int));
534535

535536
/*
@@ -725,8 +726,9 @@ create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
725726

726727
/*
727728
* For range partitioning, an additional value of -1 is stored as the last
728-
* element.
729+
* element of the indexes[] array.
729730
*/
731+
boundinfo->nindexes=ndatums+1;
730732
boundinfo->indexes= (int*)palloc((ndatums+1)*sizeof(int));
731733

732734
for (i=0;i<ndatums;i++)
@@ -807,45 +809,41 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
807809
if (b1->ndatums!=b2->ndatums)
808810
return false;
809811

812+
if (b1->nindexes!=b2->nindexes)
813+
return false;
814+
810815
if (b1->null_index!=b2->null_index)
811816
return false;
812817

813818
if (b1->default_index!=b2->default_index)
814819
return false;
815820

816-
if (b1->strategy==PARTITION_STRATEGY_HASH)
821+
/* For all partition strategies, the indexes[] arrays have to match */
822+
for (i=0;i<b1->nindexes;i++)
817823
{
818-
intgreatest_modulus=get_hash_partition_greatest_modulus(b1);
819-
820-
/*
821-
* If two hash partitioned tables have different greatest moduli,
822-
* their partition schemes don't match.
823-
*/
824-
if (greatest_modulus!=get_hash_partition_greatest_modulus(b2))
824+
if (b1->indexes[i]!=b2->indexes[i])
825825
return false;
826+
}
826827

828+
/* Finally, compare the datums[] arrays */
829+
if (b1->strategy==PARTITION_STRATEGY_HASH)
830+
{
827831
/*
828832
* We arrange the partitions in the ascending order of their moduli
829833
* and remainders. Also every modulus is factor of next larger
830834
* modulus. Therefore we can safely store index of a given partition
831835
* in indexes array at remainder of that partition. Also entries at
832836
* (remainder + N * modulus) positions in indexes array are all same
833-
* for (modulus, remainder) specification for any partition. Thus
834-
* datums array from both the given bounds are same, if and only if
835-
* their indexes array will be same. So, it suffices to compare
836-
* indexes array.
837-
*/
838-
for (i=0;i<greatest_modulus;i++)
839-
if (b1->indexes[i]!=b2->indexes[i])
840-
return false;
841-
842-
#ifdefUSE_ASSERT_CHECKING
843-
844-
/*
845-
* Nonetheless make sure that the bounds are indeed same when the
837+
* for (modulus, remainder) specification for any partition. Thus the
838+
* datums arrays from the given bounds are the same, if and only if
839+
* their indexes arrays are the same. So, it suffices to compare the
840+
* indexes arrays.
841+
*
842+
* Nonetheless make sure that the bounds are indeed the same when the
846843
* indexes match. Hash partition bound stores modulus and remainder
847844
* at b1->datums[i][0] and b1->datums[i][1] position respectively.
848845
*/
846+
#ifdefUSE_ASSERT_CHECKING
849847
for (i=0;i<b1->ndatums;i++)
850848
Assert((b1->datums[i][0]==b2->datums[i][0]&&
851849
b1->datums[i][1]==b2->datums[i][1]));
@@ -891,15 +889,7 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
891889
parttypbyval[j],parttyplen[j]))
892890
return false;
893891
}
894-
895-
if (b1->indexes[i]!=b2->indexes[i])
896-
return false;
897892
}
898-
899-
/* There are ndatums+1 indexes in case of range partitions */
900-
if (b1->strategy==PARTITION_STRATEGY_RANGE&&
901-
b1->indexes[i]!=b2->indexes[i])
902-
return false;
903893
}
904894
return true;
905895
}
@@ -920,19 +910,18 @@ partition_bounds_copy(PartitionBoundInfo src,
920910
PartitionBoundInfodest;
921911
inti;
922912
intndatums;
913+
intnindexes;
923914
intpartnatts;
924-
intnum_indexes;
925915
boolhash_part;
926916
intnatts;
927917

928918
dest= (PartitionBoundInfo)palloc(sizeof(PartitionBoundInfoData));
929919

930920
dest->strategy=src->strategy;
931921
ndatums=dest->ndatums=src->ndatums;
922+
nindexes=dest->nindexes=src->nindexes;
932923
partnatts=key->partnatts;
933924

934-
num_indexes=get_partition_bound_num_indexes(src);
935-
936925
/* List partitioned tables have only a single partition key. */
937926
Assert(key->strategy!=PARTITION_STRATEGY_LIST||partnatts==1);
938927

@@ -990,8 +979,8 @@ partition_bounds_copy(PartitionBoundInfo src,
990979
}
991980
}
992981

993-
dest->indexes= (int*)palloc(sizeof(int)*num_indexes);
994-
memcpy(dest->indexes,src->indexes,sizeof(int)*num_indexes);
982+
dest->indexes= (int*)palloc(sizeof(int)*nindexes);
983+
memcpy(dest->indexes,src->indexes,sizeof(int)*nindexes);
995984

996985
dest->null_index=src->null_index;
997986
dest->default_index=src->default_index;
@@ -2456,6 +2445,7 @@ build_merged_partition_bounds(char strategy, List *merged_datums,
24562445
}
24572446

24582447
Assert(list_length(merged_indexes)==ndatums);
2448+
merged_bounds->nindexes=ndatums;
24592449
merged_bounds->indexes= (int*)palloc(sizeof(int)*ndatums);
24602450
pos=0;
24612451
foreach(lc,merged_indexes)
@@ -2889,7 +2879,7 @@ check_new_partition_bound(char *relname, Relation parent,
28892879
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
28902880
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
28912881

2892-
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
2882+
greatest_modulus=boundinfo->nindexes;
28932883
remainder=spec->remainder;
28942884

28952885
/*
@@ -3254,18 +3244,15 @@ check_default_partition_contents(Relation parent, Relation default_rel,
32543244
/*
32553245
* get_hash_partition_greatest_modulus
32563246
*
3257-
* Returns the greatest modulus of the hash partition bound. The greatest
3258-
*modulus will be at the end ofthedatums array because hash partitions are
3259-
*arrangedinthe ascending order of their moduli and remainders.
3247+
* Returns the greatest modulus of the hash partition bound.
3248+
*This is no longer used inthecore code, but we keep it around
3249+
* incase external modules are using it.
32603250
*/
32613251
int
32623252
get_hash_partition_greatest_modulus(PartitionBoundInfobound)
32633253
{
32643254
Assert(bound&&bound->strategy==PARTITION_STRATEGY_HASH);
3265-
Assert(bound->datums&&bound->ndatums>0);
3266-
Assert(DatumGetInt32(bound->datums[bound->ndatums-1][0])>0);
3267-
3268-
returnDatumGetInt32(bound->datums[bound->ndatums-1][0]);
3255+
returnbound->nindexes;
32693256
}
32703257

32713258
/*
@@ -3662,46 +3649,6 @@ qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
36623649
b1->lower,b2);
36633650
}
36643651

3665-
/*
3666-
* get_partition_bound_num_indexes
3667-
*
3668-
* Returns the number of the entries in the partition bound indexes array.
3669-
*/
3670-
staticint
3671-
get_partition_bound_num_indexes(PartitionBoundInfobound)
3672-
{
3673-
intnum_indexes;
3674-
3675-
Assert(bound);
3676-
3677-
switch (bound->strategy)
3678-
{
3679-
casePARTITION_STRATEGY_HASH:
3680-
3681-
/*
3682-
* The number of the entries in the indexes array is same as the
3683-
* greatest modulus.
3684-
*/
3685-
num_indexes=get_hash_partition_greatest_modulus(bound);
3686-
break;
3687-
3688-
casePARTITION_STRATEGY_LIST:
3689-
num_indexes=bound->ndatums;
3690-
break;
3691-
3692-
casePARTITION_STRATEGY_RANGE:
3693-
/* Range partitioned table has an extra index. */
3694-
num_indexes=bound->ndatums+1;
3695-
break;
3696-
3697-
default:
3698-
elog(ERROR,"unexpected partition strategy: %d",
3699-
(int)bound->strategy);
3700-
}
3701-
3702-
returnnum_indexes;
3703-
}
3704-
37053652
/*
37063653
* get_partition_operator
37073654
*

‎src/backend/partitioning/partprune.c

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,10 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
776776
scan_default=final_result->scan_default;
777777
while ((i=bms_next_member(final_result->bound_offsets,i)) >=0)
778778
{
779-
intpartindex=context->boundinfo->indexes[i];
779+
intpartindex;
780+
781+
Assert(i<context->boundinfo->nindexes);
782+
partindex=context->boundinfo->indexes[i];
780783

781784
if (partindex<0)
782785
{
@@ -2509,20 +2512,19 @@ get_matching_hash_bounds(PartitionPruneContext *context,
25092512
for (i=0;i<partnatts;i++)
25102513
isnull[i]=bms_is_member(i,nullkeys);
25112514

2512-
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
25132515
rowHash=compute_partition_hash_value(partnatts,partsupfunc,partcollation,
25142516
values,isnull);
25152517

2518+
greatest_modulus=boundinfo->nindexes;
25162519
if (partindices[rowHash %greatest_modulus] >=0)
25172520
result->bound_offsets=
25182521
bms_make_singleton(rowHash %greatest_modulus);
25192522
}
25202523
else
25212524
{
2522-
/* Getting here means at least one hash partition exists. */
2523-
Assert(boundinfo->ndatums>0);
2525+
/* Report all valid offsets into the boundinfo->indexes array. */
25242526
result->bound_offsets=bms_add_range(NULL,0,
2525-
boundinfo->ndatums-1);
2527+
boundinfo->nindexes-1);
25262528
}
25272529

25282530
/*
@@ -3383,30 +3385,20 @@ perform_pruning_combine_step(PartitionPruneContext *context,
33833385
PartitionPruneStepCombine*cstep,
33843386
PruneStepResult**step_results)
33853387
{
3386-
ListCell*lc1;
3387-
PruneStepResult*result=NULL;
3388+
PruneStepResult*result= (PruneStepResult*)palloc0(sizeof(PruneStepResult));
33883389
boolfirststep;
3390+
ListCell*lc1;
33893391

33903392
/*
33913393
* A combine step without any source steps is an indication to not perform
33923394
* any partition pruning. Return all datum indexes in that case.
33933395
*/
3394-
result= (PruneStepResult*)palloc0(sizeof(PruneStepResult));
3395-
if (list_length(cstep->source_stepids)==0)
3396+
if (cstep->source_stepids==NIL)
33963397
{
33973398
PartitionBoundInfoboundinfo=context->boundinfo;
3398-
intrangemax;
3399-
3400-
/*
3401-
* Add all valid offsets into the boundinfo->indexes array. For range
3402-
* partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1)
3403-
* valid entries; otherwise there are boundinfo->ndatums.
3404-
*/
3405-
rangemax=context->strategy==PARTITION_STRATEGY_RANGE ?
3406-
boundinfo->ndatums :boundinfo->ndatums-1;
34073399

34083400
result->bound_offsets=
3409-
bms_add_range(result->bound_offsets,0,rangemax);
3401+
bms_add_range(NULL,0,boundinfo->nindexes-1);
34103402
result->scan_default=partition_bound_has_default(boundinfo);
34113403
result->scan_null=partition_bound_accepts_nulls(boundinfo);
34123404
returnresult;

‎src/include/partitioning/partbounds.h

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ struct RelOptInfo;/* avoid including pathnodes.h here */
3131
* In the case of range partitioning, ndatums will typically be far less than
3232
* 2 * nparts, because a partition's upper bound and the next partition's lower
3333
* bound are the same in most common cases, and we only store one of them (the
34-
* upper bound). In case of hash partitioning, ndatums will be same as the
34+
* upper bound). In case of hash partitioning, ndatums will bethesame as the
3535
* number of partitions.
3636
*
3737
* For range and list partitioned tables, datums is an array of datum-tuples
@@ -47,20 +47,26 @@ struct RelOptInfo;/* avoid including pathnodes.h here */
4747
* the partition key's operator classes and collations.
4848
*
4949
* In the case of list partitioning, the indexes array stores one entry for
50-
* every datum, which is the index of the partition that accepts a given datum.
51-
* In case of range partitioning, it stores one entry per distinct range
52-
* datum, which is the index of the partition for which a given datum
53-
* is an upper bound. In the case of hash partitioning, the number of the
54-
* entries in the indexes array is same as the greatest modulus amongst all
55-
* partitions. For a given partition key datum-tuple, the index of the
56-
* partition which would accept that datum-tuple would be given by the entry
57-
* pointed by remainder produced when hash value of the datum-tuple is divided
58-
* by the greatest modulus.
50+
* each datum-array entry, which is the index of the partition that accepts
51+
* rows matching that datum. So nindexes == ndatums.
52+
*
53+
* In the case of range partitioning, the indexes array stores one entry per
54+
* distinct range datum, which is the index of the partition for which that
55+
* datum is an upper bound (or -1 for a "gap" that has no partition). It is
56+
* convenient to have an extra -1 entry representing values above the last
57+
* range datum, so nindexes == ndatums + 1.
58+
*
59+
* In the case of hash partitioning, the number of entries in the indexes
60+
* array is the same as the greatest modulus amongst all partitions (which
61+
* is a multiple of all partition moduli), so nindexes == greatest modulus.
62+
* The indexes array is indexed according to the hash key's remainder modulo
63+
* the greatest modulus, and it contains either the partition index accepting
64+
* that remainder, or -1 if there is no partition for that remainder.
5965
*/
6066
typedefstructPartitionBoundInfoData
6167
{
6268
charstrategy;/* hash, list or range? */
63-
intndatums;/* Length of the datums following array */
69+
intndatums;/* Length of the datums[] array */
6470
Datum**datums;
6571
PartitionRangeDatumKind**kind;/* The kind of each range bound datum;
6672
* NULL for hash and list partitioned
@@ -70,6 +76,7 @@ typedef struct PartitionBoundInfoData
7076
* if there isn't one */
7177
intdefault_index;/* Index of the default partition; -1 if there
7278
* isn't one */
79+
intnindexes;/* Length of the indexes[] array */
7380
}PartitionBoundInfoData;
7481

7582
#definepartition_bound_accepts_nulls(bi) ((bi)->null_index != -1)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp