Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8cce08f

Browse files
committed
Change NTUP_PER_BUCKET to 1 to improve hash join lookup speed.
Since this makes the bucket headers use ~10x as much memory, properlyaccount for that memory when we figure out whether everything fitsin work_mem. This might result in some cases that previously usedonly a single batch getting split into multiple batches, but it'sunclear as yet whether we need defenses against that case, and if so,what the shape of those defenses should be.It's worth noting that even in these edge cases, users should still beno worse off than they would have been last week, because commit45f6240 saved a big pile of memoryon exactly the same workloads.Tomas Vondra, reviewed and somewhat revised by me.
1 parent4ad2a54 commit8cce08f

File tree

1 file changed

+41
-34
lines changed

1 file changed

+41
-34
lines changed

‎src/backend/executor/nodeHash.c

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
386386
*/
387387

388388
/* Target bucket loading (tuples per bucket) */
389-
#defineNTUP_PER_BUCKET10
389+
#defineNTUP_PER_BUCKET1
390390

391391
void
392392
ExecChooseHashTableSize(doublentuples,inttupwidth,booluseskew,
@@ -396,12 +396,13 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
396396
{
397397
inttupsize;
398398
doubleinner_rel_bytes;
399+
longbucket_bytes;
399400
longhash_table_bytes;
400401
longskew_table_bytes;
401402
longmax_pointers;
402-
intnbatch;
403+
intnbatch=1;
403404
intnbuckets;
404-
inti;
405+
doubledbuckets;
405406

406407
/* Force a plausible relation size if no info */
407408
if (ntuples <=0.0)
@@ -460,56 +461,61 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
460461

461462
/*
462463
* Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
463-
* memory is filled. Set nbatch to the smallest power of 2 that appears
464-
*sufficient. The Min() steps limit the results so that the pointer
465-
*arrays we'll try to allocate do not exceedwork_mem.
464+
* memory is filled, assuming a single batch. The Min() step limits the
465+
*results so that the pointer arrays we'll try to allocate do not exceed
466+
* work_mem.
466467
*/
467468
max_pointers= (work_mem*1024L) /sizeof(void*);
468469
/* also ensure we avoid integer overflow in nbatch and nbuckets */
469470
max_pointers=Min(max_pointers,INT_MAX /2);
471+
dbuckets=ceil(ntuples /NTUP_PER_BUCKET);
472+
dbuckets=Min(dbuckets,max_pointers);
473+
nbuckets=Max((int)dbuckets,1024);
474+
nbuckets=1 <<my_log2(nbuckets);
475+
bucket_bytes=sizeof(HashJoinTuple)*nbuckets;
470476

471-
if (inner_rel_bytes>hash_table_bytes)
477+
/*
478+
* If there's not enough space to store the projected number of tuples
479+
* and the required bucket headers, we will need multiple batches.
480+
*/
481+
if (inner_rel_bytes+bucket_bytes>hash_table_bytes)
472482
{
473483
/* We'll need multiple batches */
474484
longlbuckets;
475485
doubledbatch;
476486
intminbatch;
487+
longbucket_size;
477488

478-
lbuckets= (hash_table_bytes /tupsize) /NTUP_PER_BUCKET;
489+
/*
490+
* Estimate the number of buckets we'll want to have when work_mem
491+
* is entirely full. Each bucket will contain a bucket pointer plus
492+
* NTUP_PER_BUCKET tuples, whose projected size already includes
493+
* overhead for the hash code, pointer to the next tuple, etc.
494+
*/
495+
bucket_size= (tupsize*NTUP_PER_BUCKET+sizeof(HashJoinTuple));
496+
lbuckets=1 <<my_log2(hash_table_bytes /bucket_size);
479497
lbuckets=Min(lbuckets,max_pointers);
480498
nbuckets= (int)lbuckets;
499+
bucket_bytes=nbuckets*sizeof(HashJoinTuple);
500+
501+
/*
502+
* Buckets are simple pointers to hashjoin tuples, while tupsize
503+
* includes the pointer, hash code, and MinimalTupleData. So buckets
504+
* should never really exceed 25% of work_mem (even for
505+
* NTUP_PER_BUCKET=1); except maybe * for work_mem values that are
506+
* not 2^N bytes, where we might get more * because of doubling.
507+
* So let's look for 50% here.
508+
*/
509+
Assert(bucket_bytes <=hash_table_bytes /2);
481510

482-
dbatch=ceil(inner_rel_bytes /hash_table_bytes);
511+
/* Calculate required number of batches. */
512+
dbatch=ceil(inner_rel_bytes / (hash_table_bytes-bucket_bytes));
483513
dbatch=Min(dbatch,max_pointers);
484514
minbatch= (int)dbatch;
485515
nbatch=2;
486516
while (nbatch<minbatch)
487517
nbatch <<=1;
488518
}
489-
else
490-
{
491-
/* We expect the hashtable to fit in memory */
492-
doubledbuckets;
493-
494-
dbuckets=ceil(ntuples /NTUP_PER_BUCKET);
495-
dbuckets=Min(dbuckets,max_pointers);
496-
nbuckets= (int)dbuckets;
497-
498-
nbatch=1;
499-
}
500-
501-
/*
502-
* Both nbuckets and nbatch must be powers of 2 to make
503-
* ExecHashGetBucketAndBatch fast. We already fixed nbatch; now inflate
504-
* nbuckets to the next larger power of 2. We also force nbuckets to not
505-
* be real small, by starting the search at 2^10. (Note: above we made
506-
* sure that nbuckets is not more than INT_MAX / 2, so this loop cannot
507-
* overflow, nor can the final shift to recalculate nbuckets.)
508-
*/
509-
i=10;
510-
while ((1 <<i)<nbuckets)
511-
i++;
512-
nbuckets= (1 <<i);
513519

514520
*numbuckets=nbuckets;
515521
*numbatches=nbatch;
@@ -754,7 +760,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
754760
hashtable->spaceUsed+=hashTupleSize;
755761
if (hashtable->spaceUsed>hashtable->spacePeak)
756762
hashtable->spacePeak=hashtable->spaceUsed;
757-
if (hashtable->spaceUsed>hashtable->spaceAllowed)
763+
if (hashtable->spaceUsed+hashtable->nbuckets*sizeof(HashJoinTuple)
764+
>hashtable->spaceAllowed)
758765
ExecHashIncreaseNumBatches(hashtable);
759766
}
760767
else

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp