@@ -386,7 +386,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
386386 */
387387
388388/* Target bucket loading (tuples per bucket) */
389- #define NTUP_PER_BUCKET 10
389+ #define NTUP_PER_BUCKET 1
390390
391391void
392392ExecChooseHashTableSize (double ntuples ,int tupwidth ,bool useskew ,
@@ -396,12 +396,13 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
396396{
397397int tupsize ;
398398double inner_rel_bytes ;
399+ long bucket_bytes ;
399400long hash_table_bytes ;
400401long skew_table_bytes ;
401402long max_pointers ;
402- int nbatch ;
403+ int nbatch = 1 ;
403404int nbuckets ;
404- int i ;
405+ double dbuckets ;
405406
406407/* Force a plausible relation size if no info */
407408if (ntuples <=0.0 )
@@ -460,56 +461,61 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
460461
461462/*
462463 * Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
463- * memory is filled. Set nbatch to the smallest power of 2 that appears
464- *sufficient. The Min() steps limit the results so that the pointer
465- *arrays we'll try to allocate do not exceed work_mem.
464+ * memory is filled, assuming a single batch. The Min() step limits the
465+ *results so that the pointer arrays we'll try to allocate do not exceed
466+ * work_mem.
466467 */
467468max_pointers = (work_mem * 1024L ) /sizeof (void * );
468469/* also ensure we avoid integer overflow in nbatch and nbuckets */
469470max_pointers = Min (max_pointers ,INT_MAX /2 );
471+ dbuckets = ceil (ntuples /NTUP_PER_BUCKET );
472+ dbuckets = Min (dbuckets ,max_pointers );
473+ nbuckets = Max ((int )dbuckets ,1024 );
474+ nbuckets = 1 <<my_log2 (nbuckets );
475+ bucket_bytes = sizeof (HashJoinTuple )* nbuckets ;
470476
471- if (inner_rel_bytes > hash_table_bytes )
477+ /*
478+ * If there's not enough space to store the projected number of tuples
479+ * and the required bucket headers, we will need multiple batches.
480+ */
481+ if (inner_rel_bytes + bucket_bytes > hash_table_bytes )
472482{
473483/* We'll need multiple batches */
474484long lbuckets ;
475485double dbatch ;
476486int minbatch ;
487+ long bucket_size ;
477488
478- lbuckets = (hash_table_bytes /tupsize ) /NTUP_PER_BUCKET ;
489+ /*
490+ * Estimate the number of buckets we'll want to have when work_mem
491+ * is entirely full. Each bucket will contain a bucket pointer plus
492+ * NTUP_PER_BUCKET tuples, whose projected size already includes
493+ * overhead for the hash code, pointer to the next tuple, etc.
494+ */
495+ bucket_size = (tupsize * NTUP_PER_BUCKET + sizeof (HashJoinTuple ));
496+ lbuckets = 1 <<my_log2 (hash_table_bytes /bucket_size );
479497lbuckets = Min (lbuckets ,max_pointers );
480498nbuckets = (int )lbuckets ;
499+ bucket_bytes = nbuckets * sizeof (HashJoinTuple );
500+
501+ /*
502+ * Buckets are simple pointers to hashjoin tuples, while tupsize
503+ * includes the pointer, hash code, and MinimalTupleData. So buckets
504+ * should never really exceed 25% of work_mem (even for
505+ * NTUP_PER_BUCKET=1); except maybe * for work_mem values that are
506+ * not 2^N bytes, where we might get more * because of doubling.
507+ * So let's look for 50% here.
508+ */
509+ Assert (bucket_bytes <=hash_table_bytes /2 );
481510
482- dbatch = ceil (inner_rel_bytes /hash_table_bytes );
511+ /* Calculate required number of batches. */
512+ dbatch = ceil (inner_rel_bytes / (hash_table_bytes - bucket_bytes ));
483513dbatch = Min (dbatch ,max_pointers );
484514minbatch = (int )dbatch ;
485515nbatch = 2 ;
486516while (nbatch < minbatch )
487517nbatch <<=1 ;
488518}
489- else
490- {
491- /* We expect the hashtable to fit in memory */
492- double dbuckets ;
493-
494- dbuckets = ceil (ntuples /NTUP_PER_BUCKET );
495- dbuckets = Min (dbuckets ,max_pointers );
496- nbuckets = (int )dbuckets ;
497-
498- nbatch = 1 ;
499- }
500-
501- /*
502- * Both nbuckets and nbatch must be powers of 2 to make
503- * ExecHashGetBucketAndBatch fast. We already fixed nbatch; now inflate
504- * nbuckets to the next larger power of 2. We also force nbuckets to not
505- * be real small, by starting the search at 2^10. (Note: above we made
506- * sure that nbuckets is not more than INT_MAX / 2, so this loop cannot
507- * overflow, nor can the final shift to recalculate nbuckets.)
508- */
509- i = 10 ;
510- while ((1 <<i )< nbuckets )
511- i ++ ;
512- nbuckets = (1 <<i );
513519
514520* numbuckets = nbuckets ;
515521* numbatches = nbatch ;
@@ -754,7 +760,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
754760hashtable -> spaceUsed += hashTupleSize ;
755761if (hashtable -> spaceUsed > hashtable -> spacePeak )
756762hashtable -> spacePeak = hashtable -> spaceUsed ;
757- if (hashtable -> spaceUsed > hashtable -> spaceAllowed )
763+ if (hashtable -> spaceUsed + hashtable -> nbuckets * sizeof (HashJoinTuple )
764+ > hashtable -> spaceAllowed )
758765ExecHashIncreaseNumBatches (hashtable );
759766}
760767else