Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbd2c980

Browse files
committed
Buy back some of the cycles spent in more-expensive hash functions by
selecting power-of-2, rather than prime, numbers of buckets in hash joins.If the hash functions are doing their jobs properly by making all hash bitsequally random, this is good enough, and it saves expensive integer divisionand modulus operations.
1 parent1f559b7 commitbd2c980

File tree

2 files changed

+30
-30
lines changed

2 files changed

+30
-30
lines changed

‎src/backend/executor/nodeHash.c

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.111 2007/02/22 22:49:27 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.112 2007/06/01 17:38:44 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -31,6 +31,7 @@
3131
#include"executor/nodeHashjoin.h"
3232
#include"miscadmin.h"
3333
#include"parser/parse_expr.h"
34+
#include"utils/dynahash.h"
3435
#include"utils/memutils.h"
3536
#include"utils/lsyscache.h"
3637

@@ -223,6 +224,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
223224
Plan*outerNode;
224225
intnbuckets;
225226
intnbatch;
227+
intlog2_nbuckets;
226228
intnkeys;
227229
inti;
228230
ListCell*ho;
@@ -242,6 +244,10 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
242244
printf("nbatch = %d, nbuckets = %d\n",nbatch,nbuckets);
243245
#endif
244246

247+
/* nbuckets must be a power of 2 */
248+
log2_nbuckets=my_log2(nbuckets);
249+
Assert(nbuckets== (1 <<log2_nbuckets));
250+
245251
/*
246252
* Initialize the hash table control block.
247253
*
@@ -250,6 +256,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
250256
*/
251257
hashtable= (HashJoinTable)palloc(sizeof(HashJoinTableData));
252258
hashtable->nbuckets=nbuckets;
259+
hashtable->log2_nbuckets=log2_nbuckets;
253260
hashtable->buckets=NULL;
254261
hashtable->nbatch=nbatch;
255262
hashtable->curbatch=0;
@@ -345,13 +352,6 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
345352
/* Target bucket loading (tuples per bucket) */
346353
#defineNTUP_PER_BUCKET10
347354

348-
/* Prime numbers that we like to use as nbuckets values */
349-
staticconstinthprimes[]= {
350-
1033,2063,4111,8219,16417,32779,65539,131111,
351-
262151,524341,1048589,2097211,4194329,8388619,16777289,33554473,
352-
67108913,134217773,268435463,536870951,1073741831
353-
};
354-
355355
void
356356
ExecChooseHashTableSize(doublentuples,inttupwidth,
357357
int*numbuckets,
@@ -396,7 +396,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
396396
intminbatch;
397397

398398
lbuckets= (hash_table_bytes /tupsize) /NTUP_PER_BUCKET;
399-
lbuckets=Min(lbuckets,INT_MAX);
399+
lbuckets=Min(lbuckets,INT_MAX /2);
400400
nbuckets= (int)lbuckets;
401401

402402
dbatch=ceil(inner_rel_bytes /hash_table_bytes);
@@ -412,27 +412,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
412412
doubledbuckets;
413413

414414
dbuckets=ceil(ntuples /NTUP_PER_BUCKET);
415-
dbuckets=Min(dbuckets,INT_MAX);
415+
dbuckets=Min(dbuckets,INT_MAX /2);
416416
nbuckets= (int)dbuckets;
417417

418418
nbatch=1;
419419
}
420420

421421
/*
422-
* We want nbuckets to be prime so as to avoid having bucket and batch
423-
* numbers depend on only some bits of the hash code. Choose the next
424-
* larger prime from the list in hprimes[]. (This also enforces that
425-
* nbuckets is not very small, by the simple expedient of not putting any
426-
* very small entries in hprimes[].)
422+
* Both nbuckets and nbatch must be powers of 2 to make
423+
* ExecHashGetBucketAndBatch fast. We already fixed nbatch; now inflate
424+
* nbuckets to the next larger power of 2. We also force nbuckets to not
425+
* be real small, by starting the search at 2^10.
427426
*/
428-
for (i=0;i< (int)lengthof(hprimes);i++)
429-
{
430-
if (hprimes[i] >=nbuckets)
431-
{
432-
nbuckets=hprimes[i];
433-
break;
434-
}
435-
}
427+
i=10;
428+
while ((1 <<i)<nbuckets)
429+
i++;
430+
nbuckets= (1 <<i);
436431

437432
*numbuckets=nbuckets;
438433
*numbatches=nbatch;
@@ -765,8 +760,11 @@ ExecHashGetHashValue(HashJoinTable hashtable,
765760
* increase. Our algorithm is
766761
*bucketno = hashvalue MOD nbuckets
767762
*batchno = (hashvalue DIV nbuckets) MOD nbatch
768-
* where nbuckets should preferably be prime so that all bits of the
769-
* hash value can affect both bucketno and batchno.
763+
* where nbuckets and nbatch are both expected to be powers of 2, so we can
764+
* do the computations by shifting and masking. (This assumes that all hash
765+
* functions are good about randomizing all their output bits, else we are
766+
* likely to have very skewed bucket or batch occupancy.)
767+
*
770768
* nbuckets doesn't change over the course of the join.
771769
*
772770
* nbatch is always a power of 2; we increase it only by doubling it. This
@@ -783,13 +781,13 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
783781

784782
if (nbatch>1)
785783
{
786-
*bucketno=hashvalue %nbuckets;
787-
/* since nbatch is a power of 2, can do MOD by masking */
788-
*batchno= (hashvalue/nbuckets)& (nbatch-1);
784+
/* we can do MOD by masking, DIV by shifting */
785+
*bucketno=hashvalue& (nbuckets-1);
786+
*batchno= (hashvalue>>hashtable->log2_nbuckets)& (nbatch-1);
789787
}
790788
else
791789
{
792-
*bucketno=hashvalue%nbuckets;
790+
*bucketno=hashvalue& (nbuckets-1);
793791
*batchno=0;
794792
}
795793
}

‎src/include/executor/hashjoin.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.44 2007/01/30 01:33:36 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.45 2007/06/01 17:38:44 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -76,6 +76,8 @@ typedef struct HashJoinTupleData
7676
typedefstructHashJoinTableData
7777
{
7878
intnbuckets;/* # buckets in the in-memory hash table */
79+
intlog2_nbuckets;/* its log2 (nbuckets must be a power of 2) */
80+
7981
/* buckets[i] is head of list of tuples in i'th in-memory bucket */
8082
structHashJoinTupleData**buckets;
8183
/* buckets array is per-batch storage, as are all the tuples */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp