Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7c53a80

Browse files
committed
Fix hash partition pruning with asymmetric partition sets.
perform_pruning_combine_step() was not taught about the number ofpartition indexes used in hash partitioning; more embarrassingly,get_matching_hash_bounds() also had it wrong. These errors are maskedin the common case where all the partitions have the same modulusand no partition is missing. However, with missing or unequal-sizepartitions, we could erroneously prune some partitions that needto be scanned, leading to silently wrong query answers.While a minimal-footprint fix for this could be to exportget_partition_bound_num_indexes and make the incorrect functions use it,I'm of the opinion that that function should never have existed in thefirst place. It's not reasonable data structure design thatPartitionBoundInfoData lacks any explicit record of the length ofits indexes[] array. Perhaps that was all right when it could alwaysbe assumed equal to ndatums, but something should have been done aboutit as soon as that stopped being true. Putting in an explicit"nindexes" field makes both partition_bounds_equal() andpartition_bounds_copy() simpler, safer, and faster than before,and removes explicit knowledge of the number-of-partition-indexesrules from some other places too.This change also makes get_hash_partition_greatest_modulus obsolete.I left that in place in case any external code uses it, but no corecode does anymore.Per bug #16840 from Michał Albrycht. Back-patch to v11 where thehash partitioning code came in. (In the back branches, add the newfield at the end of PartitionBoundInfoData to minimize ABI risks.)Discussion:https://postgr.es/m/16840-571a22976f829ad4@postgresql.org
1 parent25f9a72 commit7c53a80

File tree

6 files changed

+123
-141
lines changed

6 files changed

+123
-141
lines changed

‎src/backend/executor/execPartition.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,16 +1323,14 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
13231323
{
13241324
casePARTITION_STRATEGY_HASH:
13251325
{
1326-
intgreatest_modulus;
13271326
uint64rowHash;
13281327

1329-
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
13301328
rowHash=compute_partition_hash_value(key->partnatts,
13311329
key->partsupfunc,
13321330
key->partcollation,
13331331
values,isnull);
13341332

1335-
part_index=boundinfo->indexes[rowHash %greatest_modulus];
1333+
part_index=boundinfo->indexes[rowHash %boundinfo->nindexes];
13361334
}
13371335
break;
13381336

‎src/backend/partitioning/partbounds.c

Lines changed: 30 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ static intpartition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
9292
Oid*partcollation,
9393
PartitionBoundInfoboundinfo,
9494
PartitionRangeBound*probe,bool*is_equal);
95-
staticintget_partition_bound_num_indexes(PartitionBoundInfob);
9695
staticExpr*make_partition_op_expr(PartitionKeykey,intkeynum,
9796
uint16strategy,Expr*arg1,Expr*arg2);
9897
staticOidget_partition_operator(PartitionKeykey,intcol,
@@ -266,6 +265,7 @@ create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
266265

267266
boundinfo->ndatums=ndatums;
268267
boundinfo->datums= (Datum**)palloc0(ndatums*sizeof(Datum*));
268+
boundinfo->nindexes=greatest_modulus;
269269
boundinfo->indexes= (int*)palloc(greatest_modulus*sizeof(int));
270270
for (i=0;i<greatest_modulus;i++)
271271
boundinfo->indexes[i]=-1;
@@ -398,6 +398,7 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
398398

399399
boundinfo->ndatums=ndatums;
400400
boundinfo->datums= (Datum**)palloc0(ndatums*sizeof(Datum*));
401+
boundinfo->nindexes=ndatums;
401402
boundinfo->indexes= (int*)palloc(ndatums*sizeof(int));
402403

403404
/*
@@ -593,8 +594,9 @@ create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
593594

594595
/*
595596
* For range partitioning, an additional value of -1 is stored as the last
596-
* element.
597+
* element of the indexes[] array.
597598
*/
599+
boundinfo->nindexes=ndatums+1;
598600
boundinfo->indexes= (int*)palloc((ndatums+1)*sizeof(int));
599601

600602
for (i=0;i<ndatums;i++)
@@ -675,45 +677,41 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
675677
if (b1->ndatums!=b2->ndatums)
676678
return false;
677679

680+
if (b1->nindexes!=b2->nindexes)
681+
return false;
682+
678683
if (b1->null_index!=b2->null_index)
679684
return false;
680685

681686
if (b1->default_index!=b2->default_index)
682687
return false;
683688

684-
if (b1->strategy==PARTITION_STRATEGY_HASH)
689+
/* For all partition strategies, the indexes[] arrays have to match */
690+
for (i=0;i<b1->nindexes;i++)
685691
{
686-
intgreatest_modulus=get_hash_partition_greatest_modulus(b1);
687-
688-
/*
689-
* If two hash partitioned tables have different greatest moduli,
690-
* their partition schemes don't match.
691-
*/
692-
if (greatest_modulus!=get_hash_partition_greatest_modulus(b2))
692+
if (b1->indexes[i]!=b2->indexes[i])
693693
return false;
694+
}
694695

696+
/* Finally, compare the datums[] arrays */
697+
if (b1->strategy==PARTITION_STRATEGY_HASH)
698+
{
695699
/*
696700
* We arrange the partitions in the ascending order of their moduli
697701
* and remainders. Also every modulus is factor of next larger
698702
* modulus. Therefore we can safely store index of a given partition
699703
* in indexes array at remainder of that partition. Also entries at
700704
* (remainder + N * modulus) positions in indexes array are all same
701-
* for (modulus, remainder) specification for any partition. Thus
702-
* datums array from both the given bounds are same, if and only if
703-
* their indexes array will be same. So, it suffices to compare
704-
* indexes array.
705-
*/
706-
for (i=0;i<greatest_modulus;i++)
707-
if (b1->indexes[i]!=b2->indexes[i])
708-
return false;
709-
710-
#ifdefUSE_ASSERT_CHECKING
711-
712-
/*
713-
* Nonetheless make sure that the bounds are indeed same when the
705+
* for (modulus, remainder) specification for any partition. Thus the
706+
* datums arrays from the given bounds are the same, if and only if
707+
* their indexes arrays are the same. So, it suffices to compare the
708+
* indexes arrays.
709+
*
710+
* Nonetheless make sure that the bounds are indeed the same when the
714711
* indexes match. Hash partition bound stores modulus and remainder
715712
* at b1->datums[i][0] and b1->datums[i][1] position respectively.
716713
*/
714+
#ifdefUSE_ASSERT_CHECKING
717715
for (i=0;i<b1->ndatums;i++)
718716
Assert((b1->datums[i][0]==b2->datums[i][0]&&
719717
b1->datums[i][1]==b2->datums[i][1]));
@@ -759,15 +757,7 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
759757
parttypbyval[j],parttyplen[j]))
760758
return false;
761759
}
762-
763-
if (b1->indexes[i]!=b2->indexes[i])
764-
return false;
765760
}
766-
767-
/* There are ndatums+1 indexes in case of range partitions */
768-
if (b1->strategy==PARTITION_STRATEGY_RANGE&&
769-
b1->indexes[i]!=b2->indexes[i])
770-
return false;
771761
}
772762
return true;
773763
}
@@ -783,17 +773,16 @@ partition_bounds_copy(PartitionBoundInfo src,
783773
PartitionBoundInfodest;
784774
inti;
785775
intndatums;
776+
intnindexes;
786777
intpartnatts;
787-
intnum_indexes;
788778

789779
dest= (PartitionBoundInfo)palloc(sizeof(PartitionBoundInfoData));
790780

791781
dest->strategy=src->strategy;
792782
ndatums=dest->ndatums=src->ndatums;
783+
nindexes=dest->nindexes=src->nindexes;
793784
partnatts=key->partnatts;
794785

795-
num_indexes=get_partition_bound_num_indexes(src);
796-
797786
/* List partitioned tables have only a single partition key. */
798787
Assert(key->strategy!=PARTITION_STRATEGY_LIST||partnatts==1);
799788

@@ -851,8 +840,8 @@ partition_bounds_copy(PartitionBoundInfo src,
851840
}
852841
}
853842

854-
dest->indexes= (int*)palloc(sizeof(int)*num_indexes);
855-
memcpy(dest->indexes,src->indexes,sizeof(int)*num_indexes);
843+
dest->indexes= (int*)palloc(sizeof(int)*nindexes);
844+
memcpy(dest->indexes,src->indexes,sizeof(int)*nindexes);
856845

857846
dest->null_index=src->null_index;
858847
dest->default_index=src->default_index;
@@ -1016,7 +1005,7 @@ check_new_partition_bound(char *relname, Relation parent,
10161005
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
10171006
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
10181007

1019-
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
1008+
greatest_modulus=boundinfo->nindexes;
10201009
remainder=spec->remainder;
10211010

10221011
/*
@@ -1380,18 +1369,15 @@ check_default_partition_contents(Relation parent, Relation default_rel,
13801369
/*
13811370
* get_hash_partition_greatest_modulus
13821371
*
1383-
* Returns the greatest modulus of the hash partition bound. The greatest
1384-
*modulus will be at the end ofthedatums array because hash partitions are
1385-
*arrangedinthe ascending order of their moduli and remainders.
1372+
* Returns the greatest modulus of the hash partition bound.
1373+
*This is no longer used inthecore code, but we keep it around
1374+
* incase external modules are using it.
13861375
*/
13871376
int
13881377
get_hash_partition_greatest_modulus(PartitionBoundInfobound)
13891378
{
13901379
Assert(bound&&bound->strategy==PARTITION_STRATEGY_HASH);
1391-
Assert(bound->datums&&bound->ndatums>0);
1392-
Assert(DatumGetInt32(bound->datums[bound->ndatums-1][0])>0);
1393-
1394-
returnDatumGetInt32(bound->datums[bound->ndatums-1][0]);
1380+
returnbound->nindexes;
13951381
}
13961382

13971383
/*
@@ -1788,46 +1774,6 @@ qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
17881774
b1->lower,b2);
17891775
}
17901776

1791-
/*
1792-
* get_partition_bound_num_indexes
1793-
*
1794-
* Returns the number of the entries in the partition bound indexes array.
1795-
*/
1796-
staticint
1797-
get_partition_bound_num_indexes(PartitionBoundInfobound)
1798-
{
1799-
intnum_indexes;
1800-
1801-
Assert(bound);
1802-
1803-
switch (bound->strategy)
1804-
{
1805-
casePARTITION_STRATEGY_HASH:
1806-
1807-
/*
1808-
* The number of the entries in the indexes array is same as the
1809-
* greatest modulus.
1810-
*/
1811-
num_indexes=get_hash_partition_greatest_modulus(bound);
1812-
break;
1813-
1814-
casePARTITION_STRATEGY_LIST:
1815-
num_indexes=bound->ndatums;
1816-
break;
1817-
1818-
casePARTITION_STRATEGY_RANGE:
1819-
/* Range partitioned table has an extra index. */
1820-
num_indexes=bound->ndatums+1;
1821-
break;
1822-
1823-
default:
1824-
elog(ERROR,"unexpected partition strategy: %d",
1825-
(int)bound->strategy);
1826-
}
1827-
1828-
returnnum_indexes;
1829-
}
1830-
18311777
/*
18321778
* get_partition_operator
18331779
*

‎src/backend/partitioning/partprune.c

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,10 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
789789
scan_default=final_result->scan_default;
790790
while ((i=bms_next_member(final_result->bound_offsets,i)) >=0)
791791
{
792-
intpartindex=context->boundinfo->indexes[i];
792+
intpartindex;
793+
794+
Assert(i<context->boundinfo->nindexes);
795+
partindex=context->boundinfo->indexes[i];
793796

794797
if (partindex<0)
795798
{
@@ -2518,20 +2521,19 @@ get_matching_hash_bounds(PartitionPruneContext *context,
25182521
for (i=0;i<partnatts;i++)
25192522
isnull[i]=bms_is_member(i,nullkeys);
25202523

2521-
greatest_modulus=get_hash_partition_greatest_modulus(boundinfo);
25222524
rowHash=compute_partition_hash_value(partnatts,partsupfunc,partcollation,
25232525
values,isnull);
25242526

2527+
greatest_modulus=boundinfo->nindexes;
25252528
if (partindices[rowHash %greatest_modulus] >=0)
25262529
result->bound_offsets=
25272530
bms_make_singleton(rowHash %greatest_modulus);
25282531
}
25292532
else
25302533
{
2531-
/* Getting here means at least one hash partition exists. */
2532-
Assert(boundinfo->ndatums>0);
2534+
/* Report all valid offsets into the boundinfo->indexes array. */
25332535
result->bound_offsets=bms_add_range(NULL,0,
2534-
boundinfo->ndatums-1);
2536+
boundinfo->nindexes-1);
25352537
}
25362538

25372539
/*
@@ -3392,30 +3394,20 @@ perform_pruning_combine_step(PartitionPruneContext *context,
33923394
PartitionPruneStepCombine*cstep,
33933395
PruneStepResult**step_results)
33943396
{
3395-
ListCell*lc1;
3396-
PruneStepResult*result=NULL;
3397+
PruneStepResult*result= (PruneStepResult*)palloc0(sizeof(PruneStepResult));
33973398
boolfirststep;
3399+
ListCell*lc1;
33983400

33993401
/*
34003402
* A combine step without any source steps is an indication to not perform
34013403
* any partition pruning. Return all datum indexes in that case.
34023404
*/
3403-
result= (PruneStepResult*)palloc0(sizeof(PruneStepResult));
3404-
if (list_length(cstep->source_stepids)==0)
3405+
if (cstep->source_stepids==NIL)
34053406
{
34063407
PartitionBoundInfoboundinfo=context->boundinfo;
3407-
intrangemax;
3408-
3409-
/*
3410-
* Add all valid offsets into the boundinfo->indexes array. For range
3411-
* partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1)
3412-
* valid entries; otherwise there are boundinfo->ndatums.
3413-
*/
3414-
rangemax=context->strategy==PARTITION_STRATEGY_RANGE ?
3415-
boundinfo->ndatums :boundinfo->ndatums-1;
34163408

34173409
result->bound_offsets=
3418-
bms_add_range(result->bound_offsets,0,rangemax);
3410+
bms_add_range(NULL,0,boundinfo->nindexes-1);
34193411
result->scan_default=partition_bound_has_default(boundinfo);
34203412
result->scan_null=partition_bound_accepts_nulls(boundinfo);
34213413
returnresult;

‎src/include/partitioning/partbounds.h

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
* In the case of range partitioning, ndatums will typically be far less than
3131
* 2 * nparts, because a partition's upper bound and the next partition's lower
3232
* bound are the same in most common cases, and we only store one of them (the
33-
* upper bound). In case of hash partitioning, ndatums will be same as the
33+
* upper bound). In case of hash partitioning, ndatums will bethesame as the
3434
* number of partitions.
3535
*
3636
* For range and list partitioned tables, datums is an array of datum-tuples
@@ -46,20 +46,26 @@
4646
* the partition key's operator classes and collations.
4747
*
4848
* In the case of list partitioning, the indexes array stores one entry for
49-
* every datum, which is the index of the partition that accepts a given datum.
50-
* In case of range partitioning, it stores one entry per distinct range
51-
* datum, which is the index of the partition for which a given datum
52-
* is an upper bound. In the case of hash partitioning, the number of the
53-
* entries in the indexes array is same as the greatest modulus amongst all
54-
* partitions. For a given partition key datum-tuple, the index of the
55-
* partition which would accept that datum-tuple would be given by the entry
56-
* pointed by remainder produced when hash value of the datum-tuple is divided
57-
* by the greatest modulus.
49+
* each datum-array entry, which is the index of the partition that accepts
50+
* rows matching that datum. So nindexes == ndatums.
51+
*
52+
* In the case of range partitioning, the indexes array stores one entry per
53+
* distinct range datum, which is the index of the partition for which that
54+
* datum is an upper bound (or -1 for a "gap" that has no partition). It is
55+
* convenient to have an extra -1 entry representing values above the last
56+
* range datum, so nindexes == ndatums + 1.
57+
*
58+
* In the case of hash partitioning, the number of entries in the indexes
59+
* array is the same as the greatest modulus amongst all partitions (which
60+
* is a multiple of all partition moduli), so nindexes == greatest modulus.
61+
* The indexes array is indexed according to the hash key's remainder modulo
62+
* the greatest modulus, and it contains either the partition index accepting
63+
* that remainder, or -1 if there is no partition for that remainder.
5864
*/
5965
typedefstructPartitionBoundInfoData
6066
{
6167
charstrategy;/* hash, list or range? */
62-
intndatums;/* Length of the datums following array */
68+
intndatums;/* Length of the datums[] array */
6369
Datum**datums;
6470
PartitionRangeDatumKind**kind;/* The kind of each range bound datum;
6571
* NULL for hash and list partitioned
@@ -69,6 +75,7 @@ typedef struct PartitionBoundInfoData
6975
* if there isn't one */
7076
intdefault_index;/* Index of the default partition; -1 if there
7177
* isn't one */
78+
intnindexes;/* Length of the indexes[] array */
7279
}PartitionBoundInfoData;
7380

7481
#definepartition_bound_accepts_nulls(bi) ((bi)->null_index != -1)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp