NotificationsYou must be signed in to change notification settings
Fork5
Star26

Commit01a819a

committed

Make planner compute the number of hash buckets the same way that

nodeHash.c will compute it (by sharing code).

1 parentccda1a6 commit01a819aCopy full SHA for 01a819a

File tree

3 files changed

+161

-127

lines changed

src
- backend
  - executor
    - nodeHash.c
  - optimizer/path
    - costsize.c
- include/executor
  - nodeHash.h

3 files changed

+161

-127

lines changed

`‎src/backend/executor/nodeHash.c`

Lines changed: 119 additions & 94 deletions

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`* Portions Copyright (c) 1994, Regents of the University of California`
`8`	`8`	`*`
`9`	`9`	`*`
`10`		`- *$Id: nodeHash.c,v 1.57 2001/05/27 20:42:18 tgl Exp $`
	`10`	`+ *$Id: nodeHash.c,v 1.58 2001/06/11 00:17:07 tgl Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -16,14 +16,12 @@`
`16`	`16`	`*ExecHash- generate an in-memory hash table of the relation`
`17`	`17`	`*ExecInitHash- initialize node and subnodes`
`18`	`18`	`*ExecEndHash- shutdown node and subnodes`
`19`		`- *`
`20`	`19`	`*/`
	`20`	`+#include"postgres.h"`
`21`	`21`
`22`	`22`	`#include<sys/types.h>`
`23`	`23`	`#include<math.h>`
`24`	`24`
`25`		`-#include"postgres.h"`
`26`		`-`
`27`	`25`	`#include"executor/execdebug.h"`
`28`	`26`	`#include"executor/nodeHash.h"`
`29`	`27`	`#include"executor/nodeHashjoin.h"`
`@@ -209,111 +207,27 @@ ExecEndHash(Hash *node)`
`209`	`207`	`*create a hashtable in shared memory for hashjoin.`
`210`	`208`	`* ----------------------------------------------------------------`
`211`	`209`	`*/`
`212`		`-#defineFUDGE_FAC2.0`
`213`		`-`
`214`	`210`	`HashJoinTable`
`215`	`211`	`ExecHashTableCreate(Hash*node)`
`216`	`212`	`{`
`217`		`-Plan*outerNode;`
`218`		`-doublentuples;`
`219`		`-inttupsize;`
`220`		`-doubleinner_rel_bytes;`
`221`		`-doublehash_table_bytes;`
`222`		`-intnbatch;`
`223`	`213`	`HashJoinTablehashtable;`
`224`		`-intnbuckets;`
	`214`	`+Plan*outerNode;`
`225`	`215`	`inttotalbuckets;`
`226`		`-intbucketsize;`
	`216`	`+intnbuckets;`
	`217`	`+intnbatch;`
`227`	`218`	`inti;`
`228`	`219`	`MemoryContextoldcxt;`
`229`	`220`
`230`	`221`	`/*`
`231`	`222`	`* Get information about the size of the relation to be hashed (it's`
`232`	`223`	`* the "outer" subtree of this node, but the inner relation of the`
`233`		`- * hashjoin).`
`234`		`- *`
`235`		`- * Caution: this is only the planner's estimates, and so can't be trusted`
`236`		`- * too far. Apply a healthy fudge factor.`
	`224`	`+ * hashjoin). Compute the appropriate size of the hash table.`
`237`	`225`	`*/`
`238`	`226`	`outerNode=outerPlan(node);`
`239`		`-ntuples=outerNode->plan_rows;`
`240`		`-if (ntuples <=0.0)/* force a plausible size if no info */`
`241`		`-ntuples=1000.0;`
`242`		`-`
`243`		`-/*`
`244`		`- * estimate tupsize based on footprint of tuple in hashtable... but`
`245`		`- * what about palloc overhead?`
`246`		`- */`
`247`		`-tupsize=MAXALIGN(outerNode->plan_width)+`
`248`		`-MAXALIGN(sizeof(HashJoinTupleData));`
`249`		`-inner_rel_bytes=ntuplestupsizeFUDGE_FAC;`
`250`		`-`
`251`		`-/*`
`252`		`- * Target hashtable size is SortMem kilobytes, but not less than`
`253`		`- * sqrt(estimated inner rel size), so as to avoid horrible`
`254`		`- * performance.`
`255`		`- */`
`256`		`-hash_table_bytes=sqrt(inner_rel_bytes);`
`257`		`-if (hash_table_bytes< (SortMem*1024L))`
`258`		`-hash_table_bytes=SortMem*1024L;`
`259`		`-`
`260`		`-/*`
`261`		`- * Count the number of hash buckets we want for the whole relation,`
`262`		`- * for an average bucket load of NTUP_PER_BUCKET (per virtual`
`263`		`- * bucket!).`
`264`		`- */`
`265`		`-totalbuckets= (int)ceil(ntuples*FUDGE_FAC /NTUP_PER_BUCKET);`
`266`		`-`
`267`		`-/*`
`268`		`- * Count the number of buckets we think will actually fit in the`
`269`		`- * target memory size, at a loading of NTUP_PER_BUCKET (physical`
`270`		`- * buckets). NOTE: FUDGE_FAC here determines the fraction of the`
`271`		`- * hashtable space reserved to allow for nonuniform distribution of`
`272`		`- * hash values. Perhaps this should be a different number from the`
`273`		`- * other uses of FUDGE_FAC, but since we have no real good way to pick`
`274`		`- * either one...`
`275`		`- */`
`276`		`-bucketsize=NTUP_PER_BUCKET*tupsize;`
`277`		`-nbuckets= (int) (hash_table_bytes / (bucketsize*FUDGE_FAC));`
`278`		`-if (nbuckets <=0)`
`279`		`-nbuckets=1;`
`280`	`227`
`281`		`-if (totalbuckets <=nbuckets)`
`282`		`-{`
	`228`	`+ExecChooseHashTableSize(outerNode->plan_rows,outerNode->plan_width,`
	`229`	`+&totalbuckets,&nbuckets,&nbatch);`
`283`	`230`
`284`		`-/*`
`285`		`- * We have enough space, so no batching. In theory we could even`
`286`		`- * reduce nbuckets, but since that could lead to poor behavior if`
`287`		`- * estimated ntuples is much less than reality, it seems better to`
`288`		`- * make more buckets instead of fewer.`
`289`		`- */`
`290`		`-totalbuckets=nbuckets;`
`291`		`-nbatch=0;`
`292`		`-}`
`293`		`-else`
`294`		`-{`
`295`		`-`
`296`		`-/*`
`297`		`- * Need to batch; compute how many batches we want to use. Note`
`298`		`- * that nbatch doesn't have to have anything to do with the ratio`
`299`		`- * totalbuckets/nbuckets; in fact, it is the number of groups we`
`300`		`- * will use for the part of the data that doesn't fall into the`
`301`		`- * first nbuckets hash buckets.`
`302`		`- */`
`303`		`-nbatch= (int)ceil((inner_rel_bytes-hash_table_bytes) /`
`304`		`-hash_table_bytes);`
`305`		`-if (nbatch <=0)`
`306`		`-nbatch=1;`
`307`		`-}`
`308`		`-`
`309`		`-/*`
`310`		`- * Now, totalbuckets is the number of (virtual) hashbuckets for the`
`311`		`- * whole relation, and nbuckets is the number of physical hashbuckets`
`312`		`- * we will use in the first pass. Data falling into the first`
`313`		`- * nbuckets virtual hashbuckets gets handled in the first pass;`
`314`		`- * everything else gets divided into nbatch batches to be processed in`
`315`		`- * additional passes.`
`316`		`- */`
`317`	`231`	`#ifdefHJDEBUG`
`318`	`232`	`printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n",`
`319`	`233`	`nbatch,totalbuckets,nbuckets);`
`@@ -407,6 +321,117 @@ ExecHashTableCreate(Hash *node)`
`407`	`321`	`returnhashtable;`
`408`	`322`	`}`
`409`	`323`
	`324`	`+`
	`325`	`+/*`
	`326`	`+ * Compute appropriate size for hashtable given the estimated size of the`
	`327`	`+ * relation to be hashed (number of rows and average row width).`
	`328`	`+ *`
	`329`	`+ * Caution: the input is only the planner's estimates, and so can't be`
	`330`	`+ * trusted too far. Apply a healthy fudge factor.`
	`331`	`+ *`
	`332`	`+ * This is exported so that the planner's costsize.c can use it.`
	`333`	`+ */`
	`334`	`+`
	`335`	`+/* Target bucket loading (tuples per bucket) */`
	`336`	`+#defineNTUP_PER_BUCKET10`
	`337`	`+/* Fudge factor to allow for inaccuracy of input estimates */`
	`338`	`+#defineFUDGE_FAC2.0`
	`339`	`+`
	`340`	`+void`
	`341`	`+ExecChooseHashTableSize(doublentuples,inttupwidth,`
	`342`	`+int*virtualbuckets,`
	`343`	`+int*physicalbuckets,`
	`344`	`+int*numbatches)`
	`345`	`+{`
	`346`	`+inttupsize;`
	`347`	`+doubleinner_rel_bytes;`
	`348`	`+doublehash_table_bytes;`
	`349`	`+intnbatch;`
	`350`	`+intnbuckets;`
	`351`	`+inttotalbuckets;`
	`352`	`+intbucketsize;`
	`353`	`+`
	`354`	`+/* Force a plausible relation size if no info */`
	`355`	`+if (ntuples <=0.0)`
	`356`	`+ntuples=1000.0;`
	`357`	`+`
	`358`	`+/*`
	`359`	`+ * Estimate tupsize based on footprint of tuple in hashtable... but`
	`360`	`+ * what about palloc overhead?`
	`361`	`+ */`
	`362`	`+tupsize=MAXALIGN(tupwidth)+MAXALIGN(sizeof(HashJoinTupleData));`
	`363`	`+inner_rel_bytes=ntuplestupsizeFUDGE_FAC;`
	`364`	`+`
	`365`	`+/*`
	`366`	`+ * Target hashtable size is SortMem kilobytes, but not less than`
	`367`	`+ * sqrt(estimated inner rel size), so as to avoid horrible`
	`368`	`+ * performance.`
	`369`	`+ */`
	`370`	`+hash_table_bytes=sqrt(inner_rel_bytes);`
	`371`	`+if (hash_table_bytes< (SortMem*1024L))`
	`372`	`+hash_table_bytes=SortMem*1024L;`
	`373`	`+`
	`374`	`+/*`
	`375`	`+ * Count the number of hash buckets we want for the whole relation,`
	`376`	`+ * for an average bucket load of NTUP_PER_BUCKET (per virtual`
	`377`	`+ * bucket!).`
	`378`	`+ */`
	`379`	`+totalbuckets= (int)ceil(ntuples*FUDGE_FAC /NTUP_PER_BUCKET);`
	`380`	`+`
	`381`	`+/*`
	`382`	`+ * Count the number of buckets we think will actually fit in the`
	`383`	`+ * target memory size, at a loading of NTUP_PER_BUCKET (physical`
	`384`	`+ * buckets). NOTE: FUDGE_FAC here determines the fraction of the`
	`385`	`+ * hashtable space reserved to allow for nonuniform distribution of`
	`386`	`+ * hash values. Perhaps this should be a different number from the`
	`387`	`+ * other uses of FUDGE_FAC, but since we have no real good way to pick`
	`388`	`+ * either one...`
	`389`	`+ */`
	`390`	`+bucketsize=NTUP_PER_BUCKET*tupsize;`
	`391`	`+nbuckets= (int) (hash_table_bytes / (bucketsize*FUDGE_FAC));`
	`392`	`+if (nbuckets <=0)`
	`393`	`+nbuckets=1;`
	`394`	`+`
	`395`	`+if (totalbuckets <=nbuckets)`
	`396`	`+{`
	`397`	`+/*`
	`398`	`+ * We have enough space, so no batching. In theory we could even`
	`399`	`+ * reduce nbuckets, but since that could lead to poor behavior if`
	`400`	`+ * estimated ntuples is much less than reality, it seems better to`
	`401`	`+ * make more buckets instead of fewer.`
	`402`	`+ */`
	`403`	`+totalbuckets=nbuckets;`
	`404`	`+nbatch=0;`
	`405`	`+}`
	`406`	`+else`
	`407`	`+{`
	`408`	`+/*`
	`409`	`+ * Need to batch; compute how many batches we want to use. Note`
	`410`	`+ * that nbatch doesn't have to have anything to do with the ratio`
	`411`	`+ * totalbuckets/nbuckets; in fact, it is the number of groups we`
	`412`	`+ * will use for the part of the data that doesn't fall into the`
	`413`	`+ * first nbuckets hash buckets.`
	`414`	`+ */`
	`415`	`+nbatch= (int)ceil((inner_rel_bytes-hash_table_bytes) /`
	`416`	`+hash_table_bytes);`
	`417`	`+if (nbatch <=0)`
	`418`	`+nbatch=1;`
	`419`	`+}`
	`420`	`+`
	`421`	`+/*`
	`422`	`+ * Now, totalbuckets is the number of (virtual) hashbuckets for the`
	`423`	`+ * whole relation, and nbuckets is the number of physical hashbuckets`
	`424`	`+ * we will use in the first pass. Data falling into the first`
	`425`	`+ * nbuckets virtual hashbuckets gets handled in the first pass;`
	`426`	`+ * everything else gets divided into nbatch batches to be processed in`
	`427`	`+ * additional passes.`
	`428`	`+ */`
	`429`	`+*virtualbuckets=totalbuckets;`
	`430`	`+*physicalbuckets=nbuckets;`
	`431`	`+*numbatches=nbatch;`
	`432`	`+}`
	`433`	`+`
	`434`	`+`
`410`	`435`	`/* ----------------------------------------------------------------`
`411`	`436`	`*ExecHashTableDestroy`
`412`	`437`	`*`

`‎src/backend/optimizer/path/costsize.c`

Lines changed: 37 additions & 29 deletions

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@`
`42`	`42`	`* Portions Copyright (c) 1994, Regents of the University of California`
`43`	`43`	`*`
`44`	`44`	`* IDENTIFICATION`
`45`		`- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.76 2001/06/10 02:59:35 tgl Exp $`
	`45`	`+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.77 2001/06/11 00:17:08 tgl Exp $`
`46`	`46`	`*`
`47`	`47`	`*-------------------------------------------------------------------------`
`48`	`48`	`*/`
`@@ -791,19 +791,19 @@ cost_hashjoin(Path path, Query root,`
`791`	`791`	`* smart enough to figure out how the restrict clauses might change the`
`792`	`792`	`* distribution, so this will have to do for now.`
`793`	`793`	`*`
`794`		`- *The executor tries for average bucket loading of NTUP_PER_BUCKET by setting`
`795`		`- *number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield`
`796`		`- *a bucketsize fractionofNTUP_PER_BUCKET / ntuples. But that goal will`
`797`		`- *be reached only if the data values are uniformly distributed among the`
`798`		`- *buckets, which requires(a) at leastntuples / NTUP_PER_BUCKET distinct`
`799`		`- *data values, and (b)a not-too-skewed data distribution. Otherwise the`
`800`		`- *buckets willbe nonuniformly occupied. If the other relation in the join`
`801`		`- *has asimilardistribution,the most-loaded buckets are exactly those`
`802`		`- * that will be probed most often. Therefore, the "average" bucket size for`
`803`		`- * costing purposes should really be taken as something close to the "worst`
`804`		`- * case" bucket size. We try to estimate this byfirst scaling up if there`
`805`		`- * are too few distinct data values, and then scaling up again by the`
`806`		`- * ratio of the most common value's frequency to the average frequency.`
	`794`	`+ *We can get the number of buckets the executor will use for the given`
	`795`	`+ *input relation. If the data were perfectly distributed, with the same`
	`796`	`+ *numberoftuples going into each available bucket, then the bucketsize`
	`797`	`+ *fraction would be 1/nbuckets. But this happy state of affairs will occur`
	`798`	`+ *only if(a)there areat leastnbuckets distinct data values, and (b)`
	`799`	`+ *we havea not-too-skewed data distribution. Otherwise the buckets will`
	`800`	`+ * be nonuniformly occupied. If the other relation in the join has a key`
	`801`	`+ *distributionsimilarto this one's, thenthe most-loaded buckets are`
	`802`	`+ *exactly thosethat will be probed most often. Therefore, the "average"`
	`803`	`+ *bucket size forcosting purposes should really be taken as something close`
	`804`	`+ *to the "worstcase" bucket size. We try to estimate this byadjusting the`
	`805`	`+ *fraction if thereare too few distinct data values, and then scaling up`
	`806`	`+ *by theratio of the most common value's frequency to the average frequency.`
`807`	`807`	`*`
`808`	`808`	`* If no statistics are available, use a default estimate of 0.1. This will`
`809`	`809`	`* discourage use of a hash rather strongly if the inner relation is large,`
`@@ -815,11 +815,13 @@ estimate_hash_bucketsize(Query root, Var var)`
`815`	`815`	`{`
`816`	`816`	`Oidrelid;`
`817`	`817`	`RelOptInfo*rel;`
	`818`	`+intvirtualbuckets;`
	`819`	`+intphysicalbuckets;`
	`820`	`+intnumbatches;`
`818`	`821`	`HeapTupletuple;`
`819`	`822`	`Form_pg_statisticstats;`
`820`	`823`	`doubleestfract,`
`821`	`824`	`ndistinct,`
`822`		`-needdistinct,`
`823`	`825`	`mcvfreq,`
`824`	`826`	`avgfreq;`
`825`	`827`	`float4*numbers;`
`@@ -841,6 +843,12 @@ estimate_hash_bucketsize(Query root, Var var)`
`841`	`843`	`if (rel->tuples <=0.0\|\|rel->rows <=0.0)`
`842`	`844`	`return0.1;/* ensure we can divide below */`
`843`	`845`
	`846`	`+/* Get hash table size that executor would use for this relation */`
	`847`	`+ExecChooseHashTableSize(rel->rows,rel->width,`
	`848`	`+&virtualbuckets,`
	`849`	`+&physicalbuckets,`
	`850`	`+&numbatches);`
	`851`	`+`
`844`	`852`	`tuple=SearchSysCache(STATRELATT,`
`845`	`853`	`ObjectIdGetDatum(relid),`
`846`	`854`	`Int16GetDatum(var->varattno),`
`@@ -857,7 +865,7 @@ estimate_hash_bucketsize(Query root, Var var)`
`857`	`865`	`caseObjectIdAttributeNumber:`
`858`	`866`	`caseSelfItemPointerAttributeNumber:`
`859`	`867`	`/* these are unique, so buckets should be well-distributed */`
`860`		`-return (double)NTUP_PER_BUCKET /rel->rows;`
	`868`	`+return1.0 /(double)virtualbuckets;`
`861`	`869`	`caseTableOidAttributeNumber:`
`862`	`870`	`/* hashing this is a terrible idea... */`
`863`	`871`	`return1.0;`
`@@ -873,6 +881,12 @@ estimate_hash_bucketsize(Query root, Var var)`
`873`	`881`	`if (ndistinct<0.0)`
`874`	`882`	`ndistinct=-ndistinct*rel->tuples;`
`875`	`883`
	`884`	`+if (ndistinct <=0.0)/* ensure we can divide */`
	`885`	`+{`
	`886`	`+ReleaseSysCache(tuple);`
	`887`	`+return0.1;`
	`888`	`+}`
	`889`	`+`
`876`	`890`	`/* Also compute avg freq of all distinct data values in raw relation */`
`877`	`891`	`avgfreq= (1.0-stats->stanullfrac) /ndistinct;`
`878`	`892`
`@@ -887,20 +901,14 @@ estimate_hash_bucketsize(Query root, Var var)`
`887`	`901`	`ndistinct *=rel->rows /rel->tuples;`
`888`	`902`
`889`	`903`	`/*`
`890`		`- * Form initial estimate of bucketsize fraction. Here we use rel->rows,`
`891`		`- * ie the number of rows after applying restriction clauses, because`
`892`		`- * that's what the fraction will eventually be multiplied by in`
`893`		`- * cost_heapjoin.`
	`904`	`+ * Initial estimate of bucketsize fraction is 1/nbuckets as long as`
	`905`	`+ * the number of buckets is less than the expected number of distinct`
	`906`	`+ * values; otherwise it is 1/ndistinct.`
`894`	`907`	`*/`
`895`		`-estfract= (double)NTUP_PER_BUCKET /rel->rows;`
`896`		`-`
`897`		`-/*`
`898`		`- * Adjust estimated bucketsize if too few distinct values (after`
`899`		`- * restriction clauses) to fill all the buckets.`
`900`		`- */`
`901`		`-needdistinct=rel->rows / (double)NTUP_PER_BUCKET;`
`902`		`-if (ndistinct<needdistinct)`
`903`		`-estfract *=needdistinct /ndistinct;`
	`908`	`+if (ndistinct> (double)virtualbuckets)`
	`909`	`+estfract=1.0 / (double)virtualbuckets;`
	`910`	`+else`
	`911`	`+estfract=1.0 /ndistinct;`
`904`	`912`
`905`	`913`	`/*`
`906`	`914`	`* Look up the frequency of the most common value, if available.`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit01a819a

File tree

3 files changed

3 files changed

`‎src/backend/executor/nodeHash.c`

`‎src/backend/optimizer/path/costsize.c`

0 commit comments