Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1d9351a

Browse files
committed
Fix hash partition pruning with asymmetric partition sets.
perform_pruning_combine_step() was not taught about the number ofpartition indexes used in hash partitioning; more embarrassingly,get_matching_hash_bounds() also had it wrong. These errors are maskedin the common case where all the partitions have the same modulusand no partition is missing. However, with missing or unequal-sizepartitions, we could erroneously prune some partitions that needto be scanned, leading to silently wrong query answers.While a minimal-footprint fix for this could be to exportget_partition_bound_num_indexes and make the incorrect functions use it,I'm of the opinion that that function should never have existed in thefirst place. It's not reasonable data structure design thatPartitionBoundInfoData lacks any explicit record of the length ofits indexes[] array. Perhaps that was all right when it could alwaysbe assumed equal to ndatums, but something should have been done aboutit as soon as that stopped being true. Putting in an explicit"nindexes" field makes both partition_bounds_equal() andpartition_bounds_copy() simpler, safer, and faster than before,and removes explicit knowledge of the number-of-partition-indexesrules from some other places too.This change also makes get_hash_partition_greatest_modulus obsolete.I left that in place in case any external code uses it, but no corecode does anymore.Per bug #16840 from Michał Albrycht. Back-patch to v11 where thehash partitioning code came in. (In the back branches, add the newfield at the end of PartitionBoundInfoData to minimize ABI risks.)Discussion:https://postgr.es/m/16840-571a22976f829ad4@postgresql.org
1 parent1b242f4 commit1d9351a

File tree

6 files changed

+124
-141
lines changed

6 files changed

+124
-141
lines changed

‎src/backend/executor/execPartition.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,16 +1323,14 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
13231323
{
13241324
casePARTITION_STRATEGY_HASH:
13251325
{
1326-
intgreatest_modulus;
13271326
uint64rowHash;
13281327

1329-
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
13301328
rowHash=compute_partition_hash_value(key->partnatts,
13311329
key->partsupfunc,
13321330
key->partcollation,
13331331
values,isnull);
13341332

1335-
part_index=boundinfo->indexes[rowHash %greatest_modulus];
1333+
part_index=boundinfo->indexes[rowHash %boundinfo->nindexes];
13361334
}
13371335
break;
13381336

‎src/backend/partitioning/partbounds.c

Lines changed: 31 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,6 @@ static intpartition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
224224
Oid*partcollation,
225225
PartitionBoundInfoboundinfo,
226226
PartitionRangeBound*probe,int32*cmpval);
227-
staticintget_partition_bound_num_indexes(PartitionBoundInfob);
228227
staticExpr*make_partition_op_expr(PartitionKeykey,intkeynum,
229228
uint16strategy,Expr*arg1,Expr*arg2);
230229
staticOidget_partition_operator(PartitionKeykey,intcol,
@@ -398,6 +397,7 @@ create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
398397

399398
boundinfo->ndatums=ndatums;
400399
boundinfo->datums= (Datum**)palloc0(ndatums*sizeof(Datum*));
400+
boundinfo->nindexes=greatest_modulus;
401401
boundinfo->indexes= (int*)palloc(greatest_modulus*sizeof(int));
402402
for (i=0;i<greatest_modulus;i++)
403403
boundinfo->indexes[i]=-1;
@@ -530,6 +530,7 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
530530

531531
boundinfo->ndatums=ndatums;
532532
boundinfo->datums= (Datum**)palloc0(ndatums*sizeof(Datum*));
533+
boundinfo->nindexes=ndatums;
533534
boundinfo->indexes= (int*)palloc(ndatums*sizeof(int));
534535

535536
/*
@@ -725,8 +726,9 @@ create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
725726

726727
/*
727728
* For range partitioning, an additional value of -1 is stored as the last
728-
* element.
729+
* element of the indexes[] array.
729730
*/
731+
boundinfo->nindexes=ndatums+1;
730732
boundinfo->indexes= (int*)palloc((ndatums+1)*sizeof(int));
731733

732734
for (i=0;i<ndatums;i++)
@@ -807,45 +809,41 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
807809
if (b1->ndatums!=b2->ndatums)
808810
return false;
809811

812+
if (b1->nindexes!=b2->nindexes)
813+
return false;
814+
810815
if (b1->null_index!=b2->null_index)
811816
return false;
812817

813818
if (b1->default_index!=b2->default_index)
814819
return false;
815820

816-
if (b1->strategy==PARTITION_STRATEGY_HASH)
821+
/* For all partition strategies, the indexes[] arrays have to match */
822+
for (i=0;i<b1->nindexes;i++)
817823
{
818-
intgreatest_modulus=get_hash_partition_greatest_modulus(b1);
819-
820-
/*
821-
* If two hash partitioned tables have different greatest moduli,
822-
* their partition schemes don't match.
823-
*/
824-
if (greatest_modulus!=get_hash_partition_greatest_modulus(b2))
824+
if (b1->indexes[i]!=b2->indexes[i])
825825
return false;
826+
}
826827

828+
/* Finally, compare the datums[] arrays */
829+
if (b1->strategy==PARTITION_STRATEGY_HASH)
830+
{
827831
/*
828832
* We arrange the partitions in the ascending order of their moduli
829833
* and remainders. Also every modulus is factor of next larger
830834
* modulus. Therefore we can safely store index of a given partition
831835
* in indexes array at remainder of that partition. Also entries at
832836
* (remainder + N * modulus) positions in indexes array are all same
833-
* for (modulus, remainder) specification for any partition. Thus
834-
* datums array from both the given bounds are same, if and only if
835-
* their indexes array will be same. So, it suffices to compare
836-
* indexes array.
837-
*/
838-
for (i=0;i<greatest_modulus;i++)
839-
if (b1->indexes[i]!=b2->indexes[i])
840-
return false;
841-
842-
#ifdefUSE_ASSERT_CHECKING
843-
844-
/*
845-
* Nonetheless make sure that the bounds are indeed same when the
837+
* for (modulus, remainder) specification for any partition. Thus the
838+
* datums arrays from the given bounds are the same, if and only if
839+
* their indexes arrays are the same. So, it suffices to compare the
840+
* indexes arrays.
841+
*
842+
* Nonetheless make sure that the bounds are indeed the same when the
846843
* indexes match. Hash partition bound stores modulus and remainder
847844
* at b1->datums[i][0] and b1->datums[i][1] position respectively.
848845
*/
846+
#ifdefUSE_ASSERT_CHECKING
849847
for (i=0;i<b1->ndatums;i++)
850848
Assert((b1->datums[i][0]==b2->datums[i][0]&&
851849
b1->datums[i][1]==b2->datums[i][1]));
@@ -891,15 +889,7 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
891889
parttypbyval[j],parttyplen[j]))
892890
return false;
893891
}
894-
895-
if (b1->indexes[i]!=b2->indexes[i])
896-
return false;
897892
}
898-
899-
/* There are ndatums+1 indexes in case of range partitions */
900-
if (b1->strategy==PARTITION_STRATEGY_RANGE&&
901-
b1->indexes[i]!=b2->indexes[i])
902-
return false;
903893
}
904894
return true;
905895
}
@@ -920,19 +910,18 @@ partition_bounds_copy(PartitionBoundInfo src,
920910
PartitionBoundInfodest;
921911
inti;
922912
intndatums;
913+
intnindexes;
923914
intpartnatts;
924-
intnum_indexes;
925915
boolhash_part;
926916
intnatts;
927917

928918
dest= (PartitionBoundInfo)palloc(sizeof(PartitionBoundInfoData));
929919

930920
dest->strategy=src->strategy;
931921
ndatums=dest->ndatums=src->ndatums;
922+
nindexes=dest->nindexes=src->nindexes;
932923
partnatts=key->partnatts;
933924

934-
num_indexes=get_partition_bound_num_indexes(src);
935-
936925
/* List partitioned tables have only a single partition key. */
937926
Assert(key->strategy!=PARTITION_STRATEGY_LIST||partnatts==1);
938927

@@ -990,8 +979,8 @@ partition_bounds_copy(PartitionBoundInfo src,
990979
}
991980
}
992981

993-
dest->indexes= (int*)palloc(sizeof(int)*num_indexes);
994-
memcpy(dest->indexes,src->indexes,sizeof(int)*num_indexes);
982+
dest->indexes= (int*)palloc(sizeof(int)*nindexes);
983+
memcpy(dest->indexes,src->indexes,sizeof(int)*nindexes);
995984

996985
dest->null_index=src->null_index;
997986
dest->default_index=src->default_index;
@@ -2456,6 +2445,7 @@ build_merged_partition_bounds(char strategy, List *merged_datums,
24562445
}
24572446

24582447
Assert(list_length(merged_indexes)==ndatums);
2448+
merged_bounds->nindexes=ndatums;
24592449
merged_bounds->indexes= (int*)palloc(sizeof(int)*ndatums);
24602450
pos=0;
24612451
foreach(lc,merged_indexes)
@@ -2889,7 +2879,7 @@ check_new_partition_bound(char *relname, Relation parent,
28892879
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
28902880
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
28912881

2892-
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
2882+
greatest_modulus=boundinfo->nindexes;
28932883
remainder=spec->remainder;
28942884

28952885
/*
@@ -3282,18 +3272,15 @@ check_default_partition_contents(Relation parent, Relation default_rel,
32823272
/*
32833273
* get_hash_partition_greatest_modulus
32843274
*
3285-
* Returns the greatest modulus of the hash partition bound. The greatest
3286-
*modulus will be at the end ofthedatums array because hash partitions are
3287-
*arrangedinthe ascending order of their moduli and remainders.
3275+
* Returns the greatest modulus of the hash partition bound.
3276+
*This is no longer used inthecore code, but we keep it around
3277+
* incase external modules are using it.
32883278
*/
32893279
int
32903280
get_hash_partition_greatest_modulus(PartitionBoundInfobound)
32913281
{
32923282
Assert(bound&&bound->strategy==PARTITION_STRATEGY_HASH);
3293-
Assert(bound->datums&&bound->ndatums>0);
3294-
Assert(DatumGetInt32(bound->datums[bound->ndatums-1][0])>0);
3295-
3296-
returnDatumGetInt32(bound->datums[bound->ndatums-1][0]);
3283+
returnbound->nindexes;
32973284
}
32983285

32993286
/*
@@ -3697,46 +3684,6 @@ qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
36973684
b1,b2);
36983685
}
36993686

3700-
/*
3701-
* get_partition_bound_num_indexes
3702-
*
3703-
* Returns the number of the entries in the partition bound indexes array.
3704-
*/
3705-
staticint
3706-
get_partition_bound_num_indexes(PartitionBoundInfobound)
3707-
{
3708-
intnum_indexes;
3709-
3710-
Assert(bound);
3711-
3712-
switch (bound->strategy)
3713-
{
3714-
casePARTITION_STRATEGY_HASH:
3715-
3716-
/*
3717-
* The number of the entries in the indexes array is same as the
3718-
* greatest modulus.
3719-
*/
3720-
num_indexes=get_hash_partition_greatest_modulus(bound);
3721-
break;
3722-
3723-
casePARTITION_STRATEGY_LIST:
3724-
num_indexes=bound->ndatums;
3725-
break;
3726-
3727-
casePARTITION_STRATEGY_RANGE:
3728-
/* Range partitioned table has an extra index. */
3729-
num_indexes=bound->ndatums+1;
3730-
break;
3731-
3732-
default:
3733-
elog(ERROR,"unexpected partition strategy: %d",
3734-
(int)bound->strategy);
3735-
}
3736-
3737-
returnnum_indexes;
3738-
}
3739-
37403687
/*
37413688
* get_partition_operator
37423689
*

‎src/backend/partitioning/partprune.c

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,10 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
781781
scan_default=final_result->scan_default;
782782
while ((i=bms_next_member(final_result->bound_offsets,i)) >=0)
783783
{
784-
intpartindex=context->boundinfo->indexes[i];
784+
intpartindex;
785+
786+
Assert(i<context->boundinfo->nindexes);
787+
partindex=context->boundinfo->indexes[i];
785788

786789
if (partindex<0)
787790
{
@@ -2514,20 +2517,19 @@ get_matching_hash_bounds(PartitionPruneContext *context,
25142517
for (i=0;i<partnatts;i++)
25152518
isnull[i]=bms_is_member(i,nullkeys);
25162519

2517-
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
25182520
rowHash=compute_partition_hash_value(partnatts,partsupfunc,partcollation,
25192521
values,isnull);
25202522

2523+
greatest_modulus=boundinfo->nindexes;
25212524
if (partindices[rowHash %greatest_modulus] >=0)
25222525
result->bound_offsets=
25232526
bms_make_singleton(rowHash %greatest_modulus);
25242527
}
25252528
else
25262529
{
2527-
/* Getting here means at least one hash partition exists. */
2528-
Assert(boundinfo->ndatums>0);
2530+
/* Report all valid offsets into the boundinfo->indexes array. */
25292531
result->bound_offsets=bms_add_range(NULL,0,
2530-
boundinfo->ndatums-1);
2532+
boundinfo->nindexes-1);
25312533
}
25322534

25332535
/*
@@ -3388,30 +3390,20 @@ perform_pruning_combine_step(PartitionPruneContext *context,
33883390
PartitionPruneStepCombine*cstep,
33893391
PruneStepResult**step_results)
33903392
{
3391-
ListCell*lc1;
3392-
PruneStepResult*result=NULL;
3393+
PruneStepResult*result= (PruneStepResult*)palloc0(sizeof(PruneStepResult));
33933394
boolfirststep;
3395+
ListCell*lc1;
33943396

33953397
/*
33963398
* A combine step without any source steps is an indication to not perform
33973399
* any partition pruning. Return all datum indexes in that case.
33983400
*/
3399-
result= (PruneStepResult*)palloc0(sizeof(PruneStepResult));
3400-
if (list_length(cstep->source_stepids)==0)
3401+
if (cstep->source_stepids==NIL)
34013402
{
34023403
PartitionBoundInfoboundinfo=context->boundinfo;
3403-
intrangemax;
3404-
3405-
/*
3406-
* Add all valid offsets into the boundinfo->indexes array. For range
3407-
* partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1)
3408-
* valid entries; otherwise there are boundinfo->ndatums.
3409-
*/
3410-
rangemax=context->strategy==PARTITION_STRATEGY_RANGE ?
3411-
boundinfo->ndatums :boundinfo->ndatums-1;
34123404

34133405
result->bound_offsets=
3414-
bms_add_range(result->bound_offsets,0,rangemax);
3406+
bms_add_range(NULL,0,boundinfo->nindexes-1);
34153407
result->scan_default=partition_bound_has_default(boundinfo);
34163408
result->scan_null=partition_bound_accepts_nulls(boundinfo);
34173409
returnresult;

‎src/include/partitioning/partbounds.h

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ struct RelOptInfo;/* avoid including pathnodes.h here */
3030
* In the case of range partitioning, ndatums will typically be far less than
3131
* 2 * nparts, because a partition's upper bound and the next partition's lower
3232
* bound are the same in most common cases, and we only store one of them (the
33-
* upper bound). In case of hash partitioning, ndatums will be same as the
33+
* upper bound). In case of hash partitioning, ndatums will bethesame as the
3434
* number of partitions.
3535
*
3636
* For range and list partitioned tables, datums is an array of datum-tuples
@@ -46,24 +46,31 @@ struct RelOptInfo;/* avoid including pathnodes.h here */
4646
* the partition key's operator classes and collations.
4747
*
4848
* In the case of list partitioning, the indexes array stores one entry for
49-
* every datum, which is the index of the partition that accepts a given datum.
50-
* In case of range partitioning, it stores one entry per distinct range
51-
* datum, which is the index of the partition for which a given datum
52-
* is an upper bound. In the case of hash partitioning, the number of the
53-
* entries in the indexes array is same as the greatest modulus amongst all
54-
* partitions. For a given partition key datum-tuple, the index of the
55-
* partition which would accept that datum-tuple would be given by the entry
56-
* pointed by remainder produced when hash value of the datum-tuple is divided
57-
* by the greatest modulus.
49+
* each datum-array entry, which is the index of the partition that accepts
50+
* rows matching that datum. So nindexes == ndatums.
51+
*
52+
* In the case of range partitioning, the indexes array stores one entry per
53+
* distinct range datum, which is the index of the partition for which that
54+
* datum is an upper bound (or -1 for a "gap" that has no partition). It is
55+
* convenient to have an extra -1 entry representing values above the last
56+
* range datum, so nindexes == ndatums + 1.
57+
*
58+
* In the case of hash partitioning, the number of entries in the indexes
59+
* array is the same as the greatest modulus amongst all partitions (which
60+
* is a multiple of all partition moduli), so nindexes == greatest modulus.
61+
* The indexes array is indexed according to the hash key's remainder modulo
62+
* the greatest modulus, and it contains either the partition index accepting
63+
* that remainder, or -1 if there is no partition for that remainder.
5864
*/
5965
typedefstructPartitionBoundInfoData
6066
{
6167
charstrategy;/* hash, list or range? */
62-
intndatums;/* Length of the datums following array */
68+
intndatums;/* Length of the datums[] array */
6369
Datum**datums;
6470
PartitionRangeDatumKind**kind;/* The kind of each range bound datum;
6571
* NULL for hash and list partitioned
6672
* tables */
73+
intnindexes;/* Length of the indexes[] array */
6774
int*indexes;/* Partition indexes */
6875
intnull_index;/* Index of the null-accepting partition; -1
6976
* if there isn't one */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp