NotificationsYou must be signed in to change notification settings
Fork4.9k
Star17.7k

Commit8cce08f

committed

Change NTUP_PER_BUCKET to 1 to improve hash join lookup speed.

Since this makes the bucket headers use ~10x as much memory, properlyaccount for that memory when we figure out whether everything fitsin work_mem. This might result in some cases that previously usedonly a single batch getting split into multiple batches, but it'sunclear as yet whether we need defenses against that case, and if so,what the shape of those defenses should be.It's worth noting that even in these edge cases, users should still beno worse off than they would have been last week, because commit45f6240 saved a big pile of memoryon exactly the same workloads.Tomas Vondra, reviewed and somewhat revised by me.

1 parent4ad2a54 commit8cce08fCopy full SHA for 8cce08f

File tree

1 file changed

+41

-34

lines changed

src/backend/executor
- nodeHash.c

1 file changed

+41

-34

lines changed

`‎src/backend/executor/nodeHash.c`

Lines changed: 41 additions & 34 deletions

Original file line number	Diff line number	Diff line change
`@@ -386,7 +386,7 @@ ExecHashTableCreate(Hash node, List hashOperators, bool keepNulls)`
`386`	`386`	`*/`
`387`	`387`
`388`	`388`	`/* Target bucket loading (tuples per bucket) */`
`389`		`-#defineNTUP_PER_BUCKET10`
	`389`	`+#defineNTUP_PER_BUCKET1`
`390`	`390`
`391`	`391`	`void`
`392`	`392`	`ExecChooseHashTableSize(doublentuples,inttupwidth,booluseskew,`
`@@ -396,12 +396,13 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,`
`396`	`396`	`{`
`397`	`397`	`inttupsize;`
`398`	`398`	`doubleinner_rel_bytes;`
	`399`	`+longbucket_bytes;`
`399`	`400`	`longhash_table_bytes;`
`400`	`401`	`longskew_table_bytes;`
`401`	`402`	`longmax_pointers;`
`402`		`-intnbatch;`
	`403`	`+intnbatch=1;`
`403`	`404`	`intnbuckets;`
`404`		`-inti;`
	`405`	`+doubledbuckets;`
`405`	`406`
`406`	`407`	`/* Force a plausible relation size if no info */`
`407`	`408`	`if (ntuples <=0.0)`
`@@ -460,56 +461,61 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,`
`460`	`461`
`461`	`462`	`/*`
`462`	`463`	`* Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when`
`463`		`- * memory is filled. Set nbatch to the smallest power of 2 that appears`
`464`		`- *sufficient. The Min() steps limit the results so that the pointer`
`465`		`- *arrays we'll try to allocate do not exceedwork_mem.`
	`464`	`+ * memory is filled, assuming a single batch. The Min() step limits the`
	`465`	`+ *results so that the pointer arrays we'll try to allocate do not exceed`
	`466`	`+ * work_mem.`
`466`	`467`	`*/`
`467`	`468`	`max_pointers= (work_mem1024L) /sizeof(void);`
`468`	`469`	`/* also ensure we avoid integer overflow in nbatch and nbuckets */`
`469`	`470`	`max_pointers=Min(max_pointers,INT_MAX /2);`
	`471`	`+dbuckets=ceil(ntuples /NTUP_PER_BUCKET);`
	`472`	`+dbuckets=Min(dbuckets,max_pointers);`
	`473`	`+nbuckets=Max((int)dbuckets,1024);`
	`474`	`+nbuckets=1 <<my_log2(nbuckets);`
	`475`	`+bucket_bytes=sizeof(HashJoinTuple)*nbuckets;`
`470`	`476`
`471`		`-if (inner_rel_bytes>hash_table_bytes)`
	`477`	`+/*`
	`478`	`+ * If there's not enough space to store the projected number of tuples`
	`479`	`+ * and the required bucket headers, we will need multiple batches.`
	`480`	`+ */`
	`481`	`+if (inner_rel_bytes+bucket_bytes>hash_table_bytes)`
`472`	`482`	`{`
`473`	`483`	`/* We'll need multiple batches */`
`474`	`484`	`longlbuckets;`
`475`	`485`	`doubledbatch;`
`476`	`486`	`intminbatch;`
	`487`	`+longbucket_size;`
`477`	`488`
`478`		`-lbuckets= (hash_table_bytes /tupsize) /NTUP_PER_BUCKET;`
	`489`	`+/*`
	`490`	`+ * Estimate the number of buckets we'll want to have when work_mem`
	`491`	`+ * is entirely full. Each bucket will contain a bucket pointer plus`
	`492`	`+ * NTUP_PER_BUCKET tuples, whose projected size already includes`
	`493`	`+ * overhead for the hash code, pointer to the next tuple, etc.`
	`494`	`+ */`
	`495`	`+bucket_size= (tupsize*NTUP_PER_BUCKET+sizeof(HashJoinTuple));`
	`496`	`+lbuckets=1 <<my_log2(hash_table_bytes /bucket_size);`
`479`	`497`	`lbuckets=Min(lbuckets,max_pointers);`
`480`	`498`	`nbuckets= (int)lbuckets;`
	`499`	`+bucket_bytes=nbuckets*sizeof(HashJoinTuple);`
	`500`	`+`
	`501`	`+/*`
	`502`	`+ * Buckets are simple pointers to hashjoin tuples, while tupsize`
	`503`	`+ * includes the pointer, hash code, and MinimalTupleData. So buckets`
	`504`	`+ * should never really exceed 25% of work_mem (even for`
	`505`	`+ * NTUP_PER_BUCKET=1); except maybe * for work_mem values that are`
	`506`	`+ * not 2^N bytes, where we might get more * because of doubling.`
	`507`	`+ * So let's look for 50% here.`
	`508`	`+ */`
	`509`	`+Assert(bucket_bytes <=hash_table_bytes /2);`
`481`	`510`
`482`		`-dbatch=ceil(inner_rel_bytes /hash_table_bytes);`
	`511`	`+/* Calculate required number of batches. */`
	`512`	`+dbatch=ceil(inner_rel_bytes / (hash_table_bytes-bucket_bytes));`
`483`	`513`	`dbatch=Min(dbatch,max_pointers);`
`484`	`514`	`minbatch= (int)dbatch;`
`485`	`515`	`nbatch=2;`
`486`	`516`	`while (nbatch<minbatch)`
`487`	`517`	`nbatch <<=1;`
`488`	`518`	`}`
`489`		`-else`
`490`		`-{`
`491`		`-/* We expect the hashtable to fit in memory */`
`492`		`-doubledbuckets;`
`493`		`-`
`494`		`-dbuckets=ceil(ntuples /NTUP_PER_BUCKET);`
`495`		`-dbuckets=Min(dbuckets,max_pointers);`
`496`		`-nbuckets= (int)dbuckets;`
`497`		`-`
`498`		`-nbatch=1;`
`499`		`-}`
`500`		`-`
`501`		`-/*`
`502`		`- * Both nbuckets and nbatch must be powers of 2 to make`
`503`		`- * ExecHashGetBucketAndBatch fast. We already fixed nbatch; now inflate`
`504`		`- * nbuckets to the next larger power of 2. We also force nbuckets to not`
`505`		`- * be real small, by starting the search at 2^10. (Note: above we made`
`506`		`- * sure that nbuckets is not more than INT_MAX / 2, so this loop cannot`
`507`		`- * overflow, nor can the final shift to recalculate nbuckets.)`
`508`		`- */`
`509`		`-i=10;`
`510`		`-while ((1 <<i)<nbuckets)`
`511`		`-i++;`
`512`		`-nbuckets= (1 <<i);`
`513`	`519`
`514`	`520`	`*numbuckets=nbuckets;`
`515`	`521`	`*numbatches=nbatch;`
`@@ -754,7 +760,8 @@ ExecHashTableInsert(HashJoinTable hashtable,`
`754`	`760`	`hashtable->spaceUsed+=hashTupleSize;`
`755`	`761`	`if (hashtable->spaceUsed>hashtable->spacePeak)`
`756`	`762`	`hashtable->spacePeak=hashtable->spaceUsed;`
`757`		`-if (hashtable->spaceUsed>hashtable->spaceAllowed)`
	`763`	`+if (hashtable->spaceUsed+hashtable->nbuckets*sizeof(HashJoinTuple)`
	`764`	`+>hashtable->spaceAllowed)`
`758`	`765`	`ExecHashIncreaseNumBatches(hashtable);`
`759`	`766`	`}`
`760`	`767`	`else`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit8cce08f

File tree

1 file changed

1 file changed

`‎src/backend/executor/nodeHash.c`

0 commit comments