88 *
99 *
1010 * IDENTIFICATION
11- * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.71 2002/12/15 16:17:46 tgl Exp $
11+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
2020 */
2121#include "postgres.h"
2222
23+ #include <limits.h>
2324#include <math.h>
2425
2526#include "access/hash.h"
@@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
344345{
345346int tupsize ;
346347double inner_rel_bytes ;
347- double hash_table_bytes ;
348+ long hash_table_bytes ;
349+ double dtmp ;
348350int nbatch ;
349351int nbuckets ;
350352int totalbuckets ;
@@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
362364inner_rel_bytes = ntuples * tupsize * FUDGE_FAC ;
363365
364366/*
365- * Target hashtable size is SortMem kilobytes, but not less than
366- * sqrt(estimated inner rel size), so as to avoid horrible
367- * performance.
367+ * Target in-memory hashtable size is SortMem kilobytes.
368368 */
369- hash_table_bytes = sqrt (inner_rel_bytes );
370- if (hash_table_bytes < (SortMem * 1024L ))
371- hash_table_bytes = SortMem * 1024L ;
369+ hash_table_bytes = SortMem * 1024L ;
372370
373371/*
374372 * Count the number of hash buckets we want for the whole relation,
375373 * for an average bucket load of NTUP_PER_BUCKET (per virtual
376- * bucket!).
374+ * bucket!). It has to fit in an int, however.
377375 */
378- totalbuckets = (int )ceil (ntuples * FUDGE_FAC /NTUP_PER_BUCKET );
376+ dtmp = ceil (ntuples * FUDGE_FAC /NTUP_PER_BUCKET );
377+ if (dtmp < INT_MAX )
378+ totalbuckets = (int )dtmp ;
379+ else
380+ totalbuckets = INT_MAX ;
381+ if (totalbuckets <=0 )
382+ totalbuckets = 1 ;
379383
380384/*
381385 * Count the number of buckets we think will actually fit in the
@@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
409413 * that nbatch doesn't have to have anything to do with the ratio
410414 * totalbuckets/nbuckets; in fact, it is the number of groups we
411415 * will use for the part of the data that doesn't fall into the
412- * first nbuckets hash buckets.
416+ * first nbuckets hash buckets. We try to set it to make all the
417+ * batches the same size. But we have to keep nbatch small
418+ * enough to avoid integer overflow in ExecHashJoinGetBatch().
413419 */
414- nbatch = (int )ceil ((inner_rel_bytes - hash_table_bytes ) /
415- hash_table_bytes );
420+ dtmp = ceil ((inner_rel_bytes - hash_table_bytes ) /
421+ hash_table_bytes );
422+ if (dtmp < INT_MAX /totalbuckets )
423+ nbatch = (int )dtmp ;
424+ else
425+ nbatch = INT_MAX /totalbuckets ;
416426if (nbatch <=0 )
417427nbatch = 1 ;
418428}