Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9878b64

Browse files
committed
HashAgg: use better cardinality estimate for recursive spilling.
Use HyperLogLog to estimate the group cardinality in a spilledpartition. This estimate is used to choose the number of partitions ifwe recurse.The previous behavior was to use the number of tuples in a spilledpartition as the estimate for the number of groups, which lead tooverpartitioning. That could cause the number of batches to be muchhigher than expected (with each batch being very small), which made itharder to interpret EXPLAIN ANALYZE results.Reviewed-by: Peter GeogheganDiscussion:https://postgr.es/m/a856635f9284bc36f7a77d02f47bbb6aaf7b59b3.camel@j-davis.comBackpatch-through: 13
1 parentf2130e7 commit9878b64

File tree

2 files changed

+43
-23
lines changed

2 files changed

+43
-23
lines changed

‎src/backend/executor/nodeAgg.c

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,11 @@
245245
#include"catalog/pg_aggregate.h"
246246
#include"catalog/pg_proc.h"
247247
#include"catalog/pg_type.h"
248+
#include"common/hashfn.h"
248249
#include"executor/execExpr.h"
249250
#include"executor/executor.h"
250251
#include"executor/nodeAgg.h"
252+
#include"lib/hyperloglog.h"
251253
#include"miscadmin.h"
252254
#include"nodes/makefuncs.h"
253255
#include"nodes/nodeFuncs.h"
@@ -295,6 +297,14 @@
295297
#defineHASHAGG_READ_BUFFER_SIZE BLCKSZ
296298
#defineHASHAGG_WRITE_BUFFER_SIZE BLCKSZ
297299

300+
/*
301+
* HyperLogLog is used for estimating the cardinality of the spilled tuples in
302+
* a given partition. 5 bits corresponds to a size of about 32 bytes and a
303+
* worst-case error of around 18%. That's effective enough to choose a
304+
* reasonable number of partitions when recursing.
305+
*/
306+
#defineHASHAGG_HLL_BIT_WIDTH 5
307+
298308
/*
299309
* Estimate chunk overhead as a constant 16 bytes. XXX: should this be
300310
* improved?
@@ -339,6 +349,7 @@ typedef struct HashAggSpill
339349
int64*ntuples;/* number of tuples in each partition */
340350
uint32mask;/* mask to find partition from hash value */
341351
intshift;/* after masking, shift by this amount */
352+
hyperLogLogState*hll_card;/* cardinality estimate for contents */
342353
}HashAggSpill;
343354

344355
/*
@@ -357,6 +368,7 @@ typedef struct HashAggBatch
357368
LogicalTapeSet*tapeset;/* borrowed reference to tape set */
358369
intinput_tapenum;/* input partition tape */
359370
int64input_tuples;/* number of tuples in this batch */
371+
doubleinput_card;/* estimated group cardinality */
360372
}HashAggBatch;
361373

362374
/* used to find referenced colnos */
@@ -411,7 +423,7 @@ static void hashagg_recompile_expressions(AggState *aggstate, bool minslot,
411423
staticlonghash_choose_num_buckets(doublehashentrysize,
412424
longestimated_nbuckets,
413425
Sizememory);
414-
staticinthash_choose_num_partitions(uint64input_groups,
426+
staticinthash_choose_num_partitions(doubleinput_groups,
415427
doublehashentrysize,
416428
intused_bits,
417429
int*log2_npartittions);
@@ -432,10 +444,11 @@ static void hashagg_finish_initial_spills(AggState *aggstate);
432444
staticvoidhashagg_reset_spill_state(AggState*aggstate);
433445
staticHashAggBatch*hashagg_batch_new(LogicalTapeSet*tapeset,
434446
intinput_tapenum,intsetno,
435-
int64input_tuples,intused_bits);
447+
int64input_tuples,doubleinput_card,
448+
intused_bits);
436449
staticMinimalTuplehashagg_batch_read(HashAggBatch*batch,uint32*hashp);
437450
staticvoidhashagg_spill_init(HashAggSpill*spill,HashTapeInfo*tapeinfo,
438-
intused_bits,uint64input_tuples,
451+
intused_bits,doubleinput_groups,
439452
doublehashentrysize);
440453
staticSizehashagg_spill_tuple(AggState*aggstate,HashAggSpill*spill,
441454
TupleTableSlot*slot,uint32hash);
@@ -1777,7 +1790,7 @@ hashagg_recompile_expressions(AggState *aggstate, bool minslot, bool nullcheck)
17771790
* substantially larger than the initial value.
17781791
*/
17791792
void
1780-
hash_agg_set_limits(doublehashentrysize,uint64input_groups,intused_bits,
1793+
hash_agg_set_limits(doublehashentrysize,doubleinput_groups,intused_bits,
17811794
Size*mem_limit,uint64*ngroups_limit,
17821795
int*num_partitions)
17831796
{
@@ -1969,7 +1982,7 @@ hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory)
19691982
* *log2_npartitions to the log2() of the number of partitions.
19701983
*/
19711984
staticint
1972-
hash_choose_num_partitions(uint64input_groups,doublehashentrysize,
1985+
hash_choose_num_partitions(doubleinput_groups,doublehashentrysize,
19731986
intused_bits,int*log2_npartitions)
19741987
{
19751988
Sizemem_wanted;
@@ -2574,7 +2587,6 @@ agg_refill_hash_table(AggState *aggstate)
25742587
AggStatePerHashperhash;
25752588
HashAggSpillspill;
25762589
HashTapeInfo*tapeinfo=aggstate->hash_tapeinfo;
2577-
uint64ngroups_estimate;
25782590
boolspill_initialized= false;
25792591

25802592
if (aggstate->hash_batches==NIL)
@@ -2583,16 +2595,7 @@ agg_refill_hash_table(AggState *aggstate)
25832595
batch=linitial(aggstate->hash_batches);
25842596
aggstate->hash_batches=list_delete_first(aggstate->hash_batches);
25852597

2586-
/*
2587-
* Estimate the number of groups for this batch as the total number of
2588-
* tuples in its input file. Although that's a worst case, it's not bad
2589-
* here for two reasons: (1) overestimating is better than
2590-
* underestimating; and (2) we've already scanned the relation once, so
2591-
* it's likely that we've already finalized many of the common values.
2592-
*/
2593-
ngroups_estimate=batch->input_tuples;
2594-
2595-
hash_agg_set_limits(aggstate->hashentrysize,ngroups_estimate,
2598+
hash_agg_set_limits(aggstate->hashentrysize,batch->input_card,
25962599
batch->used_bits,&aggstate->hash_mem_limit,
25972600
&aggstate->hash_ngroups_limit,NULL);
25982601

@@ -2678,7 +2681,7 @@ agg_refill_hash_table(AggState *aggstate)
26782681
*/
26792682
spill_initialized= true;
26802683
hashagg_spill_init(&spill,tapeinfo,batch->used_bits,
2681-
ngroups_estimate,aggstate->hashentrysize);
2684+
batch->input_card,aggstate->hashentrysize);
26822685
}
26832686
/* no memory for a new group, spill */
26842687
hashagg_spill_tuple(aggstate,&spill,spillslot,hash);
@@ -2936,7 +2939,7 @@ hashagg_tapeinfo_release(HashTapeInfo *tapeinfo, int tapenum)
29362939
*/
29372940
staticvoid
29382941
hashagg_spill_init(HashAggSpill*spill,HashTapeInfo*tapeinfo,intused_bits,
2939-
uint64input_groups,doublehashentrysize)
2942+
doubleinput_groups,doublehashentrysize)
29402943
{
29412944
intnpartitions;
29422945
intpartition_bits;
@@ -2946,13 +2949,17 @@ hashagg_spill_init(HashAggSpill *spill, HashTapeInfo *tapeinfo, int used_bits,
29462949

29472950
spill->partitions=palloc0(sizeof(int)*npartitions);
29482951
spill->ntuples=palloc0(sizeof(int64)*npartitions);
2952+
spill->hll_card=palloc0(sizeof(hyperLogLogState)*npartitions);
29492953

29502954
hashagg_tapeinfo_assign(tapeinfo,spill->partitions,npartitions);
29512955

29522956
spill->tapeset=tapeinfo->tapeset;
29532957
spill->shift=32-used_bits-partition_bits;
29542958
spill->mask= (npartitions-1) <<spill->shift;
29552959
spill->npartitions=npartitions;
2960+
2961+
for (inti=0;i<npartitions;i++)
2962+
initHyperLogLog(&spill->hll_card[i],HASHAGG_HLL_BIT_WIDTH);
29562963
}
29572964

29582965
/*
@@ -3001,6 +3008,13 @@ hashagg_spill_tuple(AggState *aggstate, HashAggSpill *spill,
30013008
partition= (hash&spill->mask) >>spill->shift;
30023009
spill->ntuples[partition]++;
30033010

3011+
/*
3012+
* All hash values destined for a given partition have some bits in
3013+
* common, which causes bad HLL cardinality estimates. Hash the hash to
3014+
* get a more uniform distribution.
3015+
*/
3016+
addHyperLogLog(&spill->hll_card[partition],hash_bytes_uint32(hash));
3017+
30043018
tapenum=spill->partitions[partition];
30053019

30063020
LogicalTapeWrite(tapeset,tapenum, (void*)&hash,sizeof(uint32));
@@ -3023,7 +3037,7 @@ hashagg_spill_tuple(AggState *aggstate, HashAggSpill *spill,
30233037
*/
30243038
staticHashAggBatch*
30253039
hashagg_batch_new(LogicalTapeSet*tapeset,inttapenum,intsetno,
3026-
int64input_tuples,intused_bits)
3040+
int64input_tuples,doubleinput_card,intused_bits)
30273041
{
30283042
HashAggBatch*batch=palloc0(sizeof(HashAggBatch));
30293043

@@ -3032,6 +3046,7 @@ hashagg_batch_new(LogicalTapeSet *tapeset, int tapenum, int setno,
30323046
batch->tapeset=tapeset;
30333047
batch->input_tapenum=tapenum;
30343048
batch->input_tuples=input_tuples;
3049+
batch->input_card=input_card;
30353050

30363051
returnbatch;
30373052
}
@@ -3135,21 +3150,26 @@ hashagg_spill_finish(AggState *aggstate, HashAggSpill *spill, int setno)
31353150

31363151
for (i=0;i<spill->npartitions;i++)
31373152
{
3138-
inttapenum=spill->partitions[i];
3139-
HashAggBatch*new_batch;
3153+
inttapenum=spill->partitions[i];
3154+
HashAggBatch*new_batch;
3155+
doublecardinality;
31403156

31413157
/* if the partition is empty, don't create a new batch of work */
31423158
if (spill->ntuples[i]==0)
31433159
continue;
31443160

3161+
cardinality=estimateHyperLogLog(&spill->hll_card[i]);
3162+
freeHyperLogLog(&spill->hll_card[i]);
3163+
31453164
new_batch=hashagg_batch_new(aggstate->hash_tapeinfo->tapeset,
31463165
tapenum,setno,spill->ntuples[i],
3147-
used_bits);
3166+
cardinality,used_bits);
31483167
aggstate->hash_batches=lcons(new_batch,aggstate->hash_batches);
31493168
aggstate->hash_batches_used++;
31503169
}
31513170

31523171
pfree(spill->ntuples);
3172+
pfree(spill->hll_card);
31533173
pfree(spill->partitions);
31543174
}
31553175

‎src/include/executor/nodeAgg.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ extern void ExecReScanAgg(AggState *node);
320320

321321
externSizehash_agg_entry_size(intnumTrans,SizetupleWidth,
322322
SizetransitionSpace);
323-
externvoidhash_agg_set_limits(doublehashentrysize,uint64input_groups,
323+
externvoidhash_agg_set_limits(doublehashentrysize,doubleinput_groups,
324324
intused_bits,Size*mem_limit,
325325
uint64*ngroups_limit,int*num_partitions);
326326

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp