Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit01a819a

Browse files
committed
Make planner compute the number of hash buckets the same way that
nodeHash.c will compute it (by sharing code).
1 parentccda1a6 commit01a819a

File tree

3 files changed

+161
-127
lines changed

3 files changed

+161
-127
lines changed

‎src/backend/executor/nodeHash.c

Lines changed: 119 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
*
10-
*$Id: nodeHash.c,v 1.57 2001/05/27 20:42:18 tgl Exp $
10+
*$Id: nodeHash.c,v 1.58 2001/06/11 00:17:07 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -16,14 +16,12 @@
1616
*ExecHash- generate an in-memory hash table of the relation
1717
*ExecInitHash- initialize node and subnodes
1818
*ExecEndHash- shutdown node and subnodes
19-
*
2019
*/
20+
#include"postgres.h"
2121

2222
#include<sys/types.h>
2323
#include<math.h>
2424

25-
#include"postgres.h"
26-
2725
#include"executor/execdebug.h"
2826
#include"executor/nodeHash.h"
2927
#include"executor/nodeHashjoin.h"
@@ -209,111 +207,27 @@ ExecEndHash(Hash *node)
209207
*create a hashtable in shared memory for hashjoin.
210208
* ----------------------------------------------------------------
211209
*/
212-
#defineFUDGE_FAC2.0
213-
214210
HashJoinTable
215211
ExecHashTableCreate(Hash*node)
216212
{
217-
Plan*outerNode;
218-
doublentuples;
219-
inttupsize;
220-
doubleinner_rel_bytes;
221-
doublehash_table_bytes;
222-
intnbatch;
223213
HashJoinTablehashtable;
224-
intnbuckets;
214+
Plan*outerNode;
225215
inttotalbuckets;
226-
intbucketsize;
216+
intnbuckets;
217+
intnbatch;
227218
inti;
228219
MemoryContextoldcxt;
229220

230221
/*
231222
* Get information about the size of the relation to be hashed (it's
232223
* the "outer" subtree of this node, but the inner relation of the
233-
* hashjoin).
234-
*
235-
* Caution: this is only the planner's estimates, and so can't be trusted
236-
* too far. Apply a healthy fudge factor.
224+
* hashjoin). Compute the appropriate size of the hash table.
237225
*/
238226
outerNode=outerPlan(node);
239-
ntuples=outerNode->plan_rows;
240-
if (ntuples <=0.0)/* force a plausible size if no info */
241-
ntuples=1000.0;
242-
243-
/*
244-
* estimate tupsize based on footprint of tuple in hashtable... but
245-
* what about palloc overhead?
246-
*/
247-
tupsize=MAXALIGN(outerNode->plan_width)+
248-
MAXALIGN(sizeof(HashJoinTupleData));
249-
inner_rel_bytes=ntuples*tupsize*FUDGE_FAC;
250-
251-
/*
252-
* Target hashtable size is SortMem kilobytes, but not less than
253-
* sqrt(estimated inner rel size), so as to avoid horrible
254-
* performance.
255-
*/
256-
hash_table_bytes=sqrt(inner_rel_bytes);
257-
if (hash_table_bytes< (SortMem*1024L))
258-
hash_table_bytes=SortMem*1024L;
259-
260-
/*
261-
* Count the number of hash buckets we want for the whole relation,
262-
* for an average bucket load of NTUP_PER_BUCKET (per virtual
263-
* bucket!).
264-
*/
265-
totalbuckets= (int)ceil(ntuples*FUDGE_FAC /NTUP_PER_BUCKET);
266-
267-
/*
268-
* Count the number of buckets we think will actually fit in the
269-
* target memory size, at a loading of NTUP_PER_BUCKET (physical
270-
* buckets). NOTE: FUDGE_FAC here determines the fraction of the
271-
* hashtable space reserved to allow for nonuniform distribution of
272-
* hash values. Perhaps this should be a different number from the
273-
* other uses of FUDGE_FAC, but since we have no real good way to pick
274-
* either one...
275-
*/
276-
bucketsize=NTUP_PER_BUCKET*tupsize;
277-
nbuckets= (int) (hash_table_bytes / (bucketsize*FUDGE_FAC));
278-
if (nbuckets <=0)
279-
nbuckets=1;
280227

281-
if (totalbuckets <=nbuckets)
282-
{
228+
ExecChooseHashTableSize(outerNode->plan_rows,outerNode->plan_width,
229+
&totalbuckets,&nbuckets,&nbatch);
283230

284-
/*
285-
* We have enough space, so no batching. In theory we could even
286-
* reduce nbuckets, but since that could lead to poor behavior if
287-
* estimated ntuples is much less than reality, it seems better to
288-
* make more buckets instead of fewer.
289-
*/
290-
totalbuckets=nbuckets;
291-
nbatch=0;
292-
}
293-
else
294-
{
295-
296-
/*
297-
* Need to batch; compute how many batches we want to use. Note
298-
* that nbatch doesn't have to have anything to do with the ratio
299-
* totalbuckets/nbuckets; in fact, it is the number of groups we
300-
* will use for the part of the data that doesn't fall into the
301-
* first nbuckets hash buckets.
302-
*/
303-
nbatch= (int)ceil((inner_rel_bytes-hash_table_bytes) /
304-
hash_table_bytes);
305-
if (nbatch <=0)
306-
nbatch=1;
307-
}
308-
309-
/*
310-
* Now, totalbuckets is the number of (virtual) hashbuckets for the
311-
* whole relation, and nbuckets is the number of physical hashbuckets
312-
* we will use in the first pass. Data falling into the first
313-
* nbuckets virtual hashbuckets gets handled in the first pass;
314-
* everything else gets divided into nbatch batches to be processed in
315-
* additional passes.
316-
*/
317231
#ifdefHJDEBUG
318232
printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n",
319233
nbatch,totalbuckets,nbuckets);
@@ -407,6 +321,117 @@ ExecHashTableCreate(Hash *node)
407321
returnhashtable;
408322
}
409323

324+
325+
/*
326+
* Compute appropriate size for hashtable given the estimated size of the
327+
* relation to be hashed (number of rows and average row width).
328+
*
329+
* Caution: the input is only the planner's estimates, and so can't be
330+
* trusted too far. Apply a healthy fudge factor.
331+
*
332+
* This is exported so that the planner's costsize.c can use it.
333+
*/
334+
335+
/* Target bucket loading (tuples per bucket) */
336+
#defineNTUP_PER_BUCKET10
337+
/* Fudge factor to allow for inaccuracy of input estimates */
338+
#defineFUDGE_FAC2.0
339+
340+
void
341+
ExecChooseHashTableSize(doublentuples,inttupwidth,
342+
int*virtualbuckets,
343+
int*physicalbuckets,
344+
int*numbatches)
345+
{
346+
inttupsize;
347+
doubleinner_rel_bytes;
348+
doublehash_table_bytes;
349+
intnbatch;
350+
intnbuckets;
351+
inttotalbuckets;
352+
intbucketsize;
353+
354+
/* Force a plausible relation size if no info */
355+
if (ntuples <=0.0)
356+
ntuples=1000.0;
357+
358+
/*
359+
* Estimate tupsize based on footprint of tuple in hashtable... but
360+
* what about palloc overhead?
361+
*/
362+
tupsize=MAXALIGN(tupwidth)+MAXALIGN(sizeof(HashJoinTupleData));
363+
inner_rel_bytes=ntuples*tupsize*FUDGE_FAC;
364+
365+
/*
366+
* Target hashtable size is SortMem kilobytes, but not less than
367+
* sqrt(estimated inner rel size), so as to avoid horrible
368+
* performance.
369+
*/
370+
hash_table_bytes=sqrt(inner_rel_bytes);
371+
if (hash_table_bytes< (SortMem*1024L))
372+
hash_table_bytes=SortMem*1024L;
373+
374+
/*
375+
* Count the number of hash buckets we want for the whole relation,
376+
* for an average bucket load of NTUP_PER_BUCKET (per virtual
377+
* bucket!).
378+
*/
379+
totalbuckets= (int)ceil(ntuples*FUDGE_FAC /NTUP_PER_BUCKET);
380+
381+
/*
382+
* Count the number of buckets we think will actually fit in the
383+
* target memory size, at a loading of NTUP_PER_BUCKET (physical
384+
* buckets). NOTE: FUDGE_FAC here determines the fraction of the
385+
* hashtable space reserved to allow for nonuniform distribution of
386+
* hash values. Perhaps this should be a different number from the
387+
* other uses of FUDGE_FAC, but since we have no real good way to pick
388+
* either one...
389+
*/
390+
bucketsize=NTUP_PER_BUCKET*tupsize;
391+
nbuckets= (int) (hash_table_bytes / (bucketsize*FUDGE_FAC));
392+
if (nbuckets <=0)
393+
nbuckets=1;
394+
395+
if (totalbuckets <=nbuckets)
396+
{
397+
/*
398+
* We have enough space, so no batching. In theory we could even
399+
* reduce nbuckets, but since that could lead to poor behavior if
400+
* estimated ntuples is much less than reality, it seems better to
401+
* make more buckets instead of fewer.
402+
*/
403+
totalbuckets=nbuckets;
404+
nbatch=0;
405+
}
406+
else
407+
{
408+
/*
409+
* Need to batch; compute how many batches we want to use. Note
410+
* that nbatch doesn't have to have anything to do with the ratio
411+
* totalbuckets/nbuckets; in fact, it is the number of groups we
412+
* will use for the part of the data that doesn't fall into the
413+
* first nbuckets hash buckets.
414+
*/
415+
nbatch= (int)ceil((inner_rel_bytes-hash_table_bytes) /
416+
hash_table_bytes);
417+
if (nbatch <=0)
418+
nbatch=1;
419+
}
420+
421+
/*
422+
* Now, totalbuckets is the number of (virtual) hashbuckets for the
423+
* whole relation, and nbuckets is the number of physical hashbuckets
424+
* we will use in the first pass. Data falling into the first
425+
* nbuckets virtual hashbuckets gets handled in the first pass;
426+
* everything else gets divided into nbatch batches to be processed in
427+
* additional passes.
428+
*/
429+
*virtualbuckets=totalbuckets;
430+
*physicalbuckets=nbuckets;
431+
*numbatches=nbatch;
432+
}
433+
434+
410435
/* ----------------------------------------------------------------
411436
*ExecHashTableDestroy
412437
*

‎src/backend/optimizer/path/costsize.c

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
* Portions Copyright (c) 1994, Regents of the University of California
4343
*
4444
* IDENTIFICATION
45-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.76 2001/06/10 02:59:35 tgl Exp $
45+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.77 2001/06/11 00:17:08 tgl Exp $
4646
*
4747
*-------------------------------------------------------------------------
4848
*/
@@ -791,19 +791,19 @@ cost_hashjoin(Path *path, Query *root,
791791
* smart enough to figure out how the restrict clauses might change the
792792
* distribution, so this will have to do for now.
793793
*
794-
*The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
795-
*number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
796-
*a bucketsize fractionofNTUP_PER_BUCKET / ntuples. But that goal will
797-
*be reached only if the data values are uniformly distributed among the
798-
*buckets, which requires(a) at leastntuples / NTUP_PER_BUCKET distinct
799-
*data values, and (b)a not-too-skewed data distribution. Otherwise the
800-
*buckets willbe nonuniformly occupied. If the other relation in the join
801-
*has asimilardistribution,the most-loaded buckets are exactly those
802-
* that will be probed most often. Therefore, the "average" bucket size for
803-
* costing purposes should really be taken as something close to the "worst
804-
* case" bucket size. We try to estimate this byfirst scaling up if there
805-
* are too few distinct data values, and then scaling up again by the
806-
* ratio of the most common value's frequency to the average frequency.
794+
*We can get the number of buckets the executor will use for the given
795+
*input relation. If the data were perfectly distributed, with the same
796+
*numberoftuples going into each available bucket, then the bucketsize
797+
*fraction would be 1/nbuckets. But this happy state of affairs will occur
798+
*only if(a)there areat leastnbuckets distinct data values, and (b)
799+
*we havea not-too-skewed data distribution. Otherwise the buckets will
800+
* be nonuniformly occupied. If the other relation in the join has a key
801+
*distributionsimilarto this one's, thenthe most-loaded buckets are
802+
*exactly thosethat will be probed most often. Therefore, the "average"
803+
*bucket size forcosting purposes should really be taken as something close
804+
*to the "worstcase" bucket size. We try to estimate this byadjusting the
805+
*fraction if thereare too few distinct data values, and then scaling up
806+
*by theratio of the most common value's frequency to the average frequency.
807807
*
808808
* If no statistics are available, use a default estimate of 0.1. This will
809809
* discourage use of a hash rather strongly if the inner relation is large,
@@ -815,11 +815,13 @@ estimate_hash_bucketsize(Query *root, Var *var)
815815
{
816816
Oidrelid;
817817
RelOptInfo*rel;
818+
intvirtualbuckets;
819+
intphysicalbuckets;
820+
intnumbatches;
818821
HeapTupletuple;
819822
Form_pg_statisticstats;
820823
doubleestfract,
821824
ndistinct,
822-
needdistinct,
823825
mcvfreq,
824826
avgfreq;
825827
float4*numbers;
@@ -841,6 +843,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
841843
if (rel->tuples <=0.0||rel->rows <=0.0)
842844
return0.1;/* ensure we can divide below */
843845

846+
/* Get hash table size that executor would use for this relation */
847+
ExecChooseHashTableSize(rel->rows,rel->width,
848+
&virtualbuckets,
849+
&physicalbuckets,
850+
&numbatches);
851+
844852
tuple=SearchSysCache(STATRELATT,
845853
ObjectIdGetDatum(relid),
846854
Int16GetDatum(var->varattno),
@@ -857,7 +865,7 @@ estimate_hash_bucketsize(Query *root, Var *var)
857865
caseObjectIdAttributeNumber:
858866
caseSelfItemPointerAttributeNumber:
859867
/* these are unique, so buckets should be well-distributed */
860-
return (double)NTUP_PER_BUCKET /rel->rows;
868+
return1.0 /(double)virtualbuckets;
861869
caseTableOidAttributeNumber:
862870
/* hashing this is a terrible idea... */
863871
return1.0;
@@ -873,6 +881,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
873881
if (ndistinct<0.0)
874882
ndistinct=-ndistinct*rel->tuples;
875883

884+
if (ndistinct <=0.0)/* ensure we can divide */
885+
{
886+
ReleaseSysCache(tuple);
887+
return0.1;
888+
}
889+
876890
/* Also compute avg freq of all distinct data values in raw relation */
877891
avgfreq= (1.0-stats->stanullfrac) /ndistinct;
878892

@@ -887,20 +901,14 @@ estimate_hash_bucketsize(Query *root, Var *var)
887901
ndistinct *=rel->rows /rel->tuples;
888902

889903
/*
890-
* Form initial estimate of bucketsize fraction. Here we use rel->rows,
891-
* ie the number of rows after applying restriction clauses, because
892-
* that's what the fraction will eventually be multiplied by in
893-
* cost_heapjoin.
904+
* Initial estimate of bucketsize fraction is 1/nbuckets as long as
905+
* the number of buckets is less than the expected number of distinct
906+
* values; otherwise it is 1/ndistinct.
894907
*/
895-
estfract= (double)NTUP_PER_BUCKET /rel->rows;
896-
897-
/*
898-
* Adjust estimated bucketsize if too few distinct values (after
899-
* restriction clauses) to fill all the buckets.
900-
*/
901-
needdistinct=rel->rows / (double)NTUP_PER_BUCKET;
902-
if (ndistinct<needdistinct)
903-
estfract *=needdistinct /ndistinct;
908+
if (ndistinct> (double)virtualbuckets)
909+
estfract=1.0 / (double)virtualbuckets;
910+
else
911+
estfract=1.0 /ndistinct;
904912

905913
/*
906914
* Look up the frequency of the most common value, if available.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp