NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commitca5b42d

committed

Fix some issues in new hashtable size calculations in nodeHash.c.

Limit the size of the hashtable pointer array to not more thanMaxAllocSize, per reports from Kouhei Kaigai and others of "invalid memoryalloc request size" failures. There was discussion of allowing the arrayto get larger than that by using the "huge" palloc API, but so far no proofthat that is actually a good idea, and at this point in the 9.5 cycle majorchanges from old behavior don't seem like the way to go.Fix a rather serious secondary bug in the new code, which was that itdidn't ensure nbuckets remained a power of 2 when recomputing it for themultiple-batch case.Clean up sloppy division of labor between ExecHashIncreaseNumBuckets andits sole call site.

1 parent544ccf6 commitca5b42dCopy full SHA for ca5b42d

File tree

2 files changed

+31

-31

lines changed

src
- backend/executor
  - nodeHash.c
- include/executor
  - hashjoin.h

2 files changed

+31

-31

lines changed

`‎src/backend/executor/nodeHash.c‎`

Lines changed: 30 additions & 30 deletions

Original file line number	Diff line number	Diff line change
`@@ -131,17 +131,7 @@ MultiExecHash(HashState *node)`
`131`	`131`
`132`	`132`	`/* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */`
`133`	`133`	`if (hashtable->nbuckets!=hashtable->nbuckets_optimal)`
`134`		`-{`
`135`		`-/* We never decrease the number of buckets. */`
`136`		`-Assert(hashtable->nbuckets_optimal>hashtable->nbuckets);`
`137`		`-`
`138`		`-#ifdefHJDEBUG`
`139`		`-printf("Increasing nbuckets %d => %d\n",`
`140`		`-hashtable->nbuckets,hashtable->nbuckets_optimal);`
`141`		`-#endif`
`142`		`-`
`143`	`134`	`ExecHashIncreaseNumBuckets(hashtable);`
`144`		`-}`
`145`	`135`
`146`	`136`	`/* Account for the buckets in spaceUsed (reported in EXPLAIN ANALYZE) */`
`147`	`137`	`hashtable->spaceUsed+=hashtable->nbuckets*sizeof(HashJoinTuple);`
`@@ -486,23 +476,31 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,`
`486`	`476`
`487`	`477`	`/*`
`488`	`478`	`* Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when`
`489`		`- * memory is filled, assuming a single batch. The Min() step limits the`
`490`		`- * results so that the pointer arrays we'll try to allocate do not exceed`
`491`		`- * work_mem.`
	`479`	`+ * memory is filled, assuming a single batch; but limit the value so that`
	`480`	`+ * the pointer arrays we'll try to allocate do not exceed work_mem nor`
	`481`	`+ * MaxAllocSize.`
	`482`	`+ *`
	`483`	`+ * Note that both nbuckets and nbatch must be powers of 2 to make`
	`484`	`+ * ExecHashGetBucketAndBatch fast.`
`492`	`485`	`*/`
`493`		`-max_pointers= (work_mem1024L) /sizeof(void);`
	`486`	`+max_pointers= (work_mem*1024L) /sizeof(HashJoinTuple);`
	`487`	`+max_pointers=Min(max_pointers,MaxAllocSize /sizeof(HashJoinTuple));`
`494`	`488`	`/* also ensure we avoid integer overflow in nbatch and nbuckets */`
	`489`	`+/* (this step is redundant given the current value of MaxAllocSize) */`
`495`	`490`	`max_pointers=Min(max_pointers,INT_MAX /2);`
	`491`	`+`
`496`	`492`	`dbuckets=ceil(ntuples /NTUP_PER_BUCKET);`
`497`	`493`	`dbuckets=Min(dbuckets,max_pointers);`
	`494`	`+/* don't let nbuckets be really small, though ... */`
`498`	`495`	`nbuckets=Max((int)dbuckets,1024);`
	`496`	`+/* ... and force it to be a power of 2. */`
`499`	`497`	`nbuckets=1 <<my_log2(nbuckets);`
`500`		`-bucket_bytes=sizeof(HashJoinTuple)*nbuckets;`
`501`	`498`
`502`	`499`	`/*`
`503`	`500`	`* If there's not enough space to store the projected number of tuples and`
`504`	`501`	`* the required bucket headers, we will need multiple batches.`
`505`	`502`	`*/`
	`503`	`+bucket_bytes=sizeof(HashJoinTuple)*nbuckets;`
`506`	`504`	`if (inner_rel_bytes+bucket_bytes>hash_table_bytes)`
`507`	`505`	`{`
`508`	`506`	`/* We'll need multiple batches */`
`@@ -521,6 +519,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,`
`521`	`519`	`lbuckets=1L <<my_log2(hash_table_bytes /bucket_size);`
`522`	`520`	`lbuckets=Min(lbuckets,max_pointers);`
`523`	`521`	`nbuckets= (int)lbuckets;`
	`522`	`+nbuckets=1 <<my_log2(nbuckets);`
`524`	`523`	`bucket_bytes=nbuckets*sizeof(HashJoinTuple);`
`525`	`524`
`526`	`525`	`/*`
`@@ -760,21 +759,18 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)`
`760`	`759`	`if (hashtable->nbuckets >=hashtable->nbuckets_optimal)`
`761`	`760`	`return;`
`762`	`761`
`763`		`-/*`
`764`		`- * We already know the optimal number of buckets, so let's just compute`
`765`		`- * the log2_nbuckets for it.`
`766`		`- */`
	`762`	`+#ifdefHJDEBUG`
	`763`	`+printf("Increasing nbuckets %d => %d\n",`
	`764`	`+hashtable->nbuckets,hashtable->nbuckets_optimal);`
	`765`	`+#endif`
	`766`	`+`
`767`	`767`	`hashtable->nbuckets=hashtable->nbuckets_optimal;`
`768`		`-hashtable->log2_nbuckets=my_log2(hashtable->nbuckets_optimal);`
	`768`	`+hashtable->log2_nbuckets=hashtable->log2_nbuckets_optimal;`
`769`	`769`
`770`	`770`	`Assert(hashtable->nbuckets>1);`
`771`	`771`	`Assert(hashtable->nbuckets <= (INT_MAX /2));`
`772`	`772`	`Assert(hashtable->nbuckets== (1 <<hashtable->log2_nbuckets));`
`773`	`773`
`774`		`-#ifdefHJDEBUG`
`775`		`-printf("Increasing nbuckets to %d\n",hashtable->nbuckets);`
`776`		`-#endif`
`777`		`-`
`778`	`774`	`/*`
`779`	`775`	`* Just reallocate the proper number of buckets - we don't need to walk`
`780`	`776`	`* through them - we can walk the dense-allocated chunks (just like in`
`@@ -785,7 +781,7 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)`
`785`	`781`	`(HashJoinTuple*)repalloc(hashtable->buckets,`
`786`	`782`	`hashtable->nbuckets*sizeof(HashJoinTuple));`
`787`	`783`
`788`		`-memset(hashtable->buckets,0,sizeof(void)hashtable->nbuckets);`
	`784`	`+memset(hashtable->buckets,0,hashtable->nbuckets*sizeof(HashJoinTuple));`
`789`	`785`
`790`	`786`	`/* scan through all tuples in all chunks to rebuild the hash table */`
`791`	`787`	`for (chunk=hashtable->chunks;chunk!=NULL;chunk=chunk->next)`
`@@ -878,12 +874,16 @@ ExecHashTableInsert(HashJoinTable hashtable,`
`878`	`874`	`* NTUP_PER_BUCKET threshold, but only when there's still a single`
`879`	`875`	`* batch.`
`880`	`876`	`*/`
`881`		`-if ((hashtable->nbatch==1)&&`
`882`		`-(hashtable->nbuckets_optimal <=INT_MAX /2)&&/* overflow protection */`
`883`		`-(ntuples >= (hashtable->nbuckets_optimal*NTUP_PER_BUCKET)))`
	`877`	`+if (hashtable->nbatch==1&&`
	`878`	`+ntuples> (hashtable->nbuckets_optimal*NTUP_PER_BUCKET))`
`884`	`879`	`{`
`885`		`-hashtable->nbuckets_optimal *=2;`
`886`		`-hashtable->log2_nbuckets_optimal+=1;`
	`880`	`+/* Guard against integer overflow and alloc size overflow */`
	`881`	`+if (hashtable->nbuckets_optimal <=INT_MAX /2&&`
	`882`	`+hashtable->nbuckets_optimal*2 <=MaxAllocSize /sizeof(HashJoinTuple))`
	`883`	`+{`
	`884`	`+hashtable->nbuckets_optimal *=2;`
	`885`	`+hashtable->log2_nbuckets_optimal+=1;`
	`886`	`+}`
`887`	`887`	`}`
`888`	`888`
`889`	`889`	`/* Account for space used, and back off if we've used too much */`

`‎src/include/executor/hashjoin.h‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -131,7 +131,7 @@ typedef struct HashJoinTableData`
`131`	`131`	`intnbuckets_original;/* # buckets when starting the first`
`132`	`132`	`* hash */`
`133`	`133`	`intnbuckets_optimal;/* optimal # buckets (per batch) */`
`134`		`-intlog2_nbuckets_optimal;/same as log2_nbuckets optimal /`
	`134`	`+intlog2_nbuckets_optimal;/log2(nbuckets_optimal) /`
`135`	`135`
`136`	`136`	`/* buckets[i] is head of list of tuples in i'th in-memory bucket */`
`137`	`137`	`structHashJoinTupleData**buckets;`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitca5b42d

File tree

2 files changed

2 files changed

`‎src/backend/executor/nodeHash.c‎`

`‎src/include/executor/hashjoin.h‎`

0 commit comments