Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc422b5c

Browse files
committed
Code review for improved-hashing patch. Fix some portability issues
(char != unsigned char, Datum != uint32); make use of new hash code indynahash hash tables and hash joins.
1 parent1eb31d1 commitc422b5c

File tree

11 files changed

+114
-190
lines changed

11 files changed

+114
-190
lines changed

‎src/backend/access/hash/hash.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.56 2002/03/09 17:35:35 tgl Exp $
1212
*
1313
* NOTES
1414
* This file contains only the public interface routines.
@@ -164,6 +164,9 @@ hashinsert(PG_FUNCTION_ARGS)
164164
Datum*datum= (Datum*)PG_GETARG_POINTER(1);
165165
char*nulls= (char*)PG_GETARG_POINTER(2);
166166
ItemPointerht_ctid= (ItemPointer)PG_GETARG_POINTER(3);
167+
#ifdefNOT_USED
168+
RelationheapRel= (Relation)PG_GETARG_POINTER(4);
169+
#endif
167170

168171
InsertIndexResultres;
169172
HashItemhitem;

‎src/backend/access/hash/hashfunc.c

Lines changed: 57 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.33 2002/03/09 17:35:35 tgl Exp $
1212
*
1313
* NOTES
1414
* These functions are stored in pg_amproc.For each operator class
@@ -58,23 +58,23 @@ hashfloat4(PG_FUNCTION_ARGS)
5858
{
5959
float4key=PG_GETARG_FLOAT4(0);
6060

61-
returnhash_any((char*)&key,sizeof(key));
61+
returnhash_any((unsignedchar*)&key,sizeof(key));
6262
}
6363

6464
Datum
6565
hashfloat8(PG_FUNCTION_ARGS)
6666
{
6767
float8key=PG_GETARG_FLOAT8(0);
6868

69-
returnhash_any((char*)&key,sizeof(key));
69+
returnhash_any((unsignedchar*)&key,sizeof(key));
7070
}
7171

7272
Datum
7373
hashoidvector(PG_FUNCTION_ARGS)
7474
{
7575
Oid*key= (Oid*)PG_GETARG_POINTER(0);
7676

77-
returnhash_any((char*)key,INDEX_MAX_KEYS*sizeof(Oid));
77+
returnhash_any((unsignedchar*)key,INDEX_MAX_KEYS*sizeof(Oid));
7878
}
7979

8080
/*
@@ -87,17 +87,18 @@ hashint2vector(PG_FUNCTION_ARGS)
8787
{
8888
int16*key= (int16*)PG_GETARG_POINTER(0);
8989

90-
returnhash_any((char*)key,INDEX_MAX_KEYS*sizeof(int16));
90+
returnhash_any((unsignedchar*)key,INDEX_MAX_KEYS*sizeof(int16));
9191
}
9292

9393
Datum
9494
hashname(PG_FUNCTION_ARGS)
9595
{
9696
char*key=NameStr(*PG_GETARG_NAME(0));
97+
intkeylen=strlen(key);
9798

98-
Assert(strlen(key) <=NAMEDATALEN);
99+
Assert(keylen<NAMEDATALEN);/* else it's not truncated correctly */
99100

100-
returnhash_any(key,strlen(key));
101+
returnhash_any((unsignedchar*)key,keylen);
101102
}
102103

103104
/*
@@ -110,21 +111,24 @@ hashvarlena(PG_FUNCTION_ARGS)
110111
structvarlena*key=PG_GETARG_VARLENA_P(0);
111112
Datumresult;
112113

113-
result=hash_any(VARDATA(key),VARSIZE(key)-VARHDRSZ);
114+
result=hash_any((unsignedchar*)VARDATA(key),
115+
VARSIZE(key)-VARHDRSZ);
114116

115117
/* Avoid leaking memory for toasted inputs */
116118
PG_FREE_IF_COPY(key,0);
117119

118120
returnresult;
119121
}
120122

121-
/* This hash function was written by Bob Jenkins
123+
/*
124+
* This hash function was written by Bob Jenkins
122125
* (bob_jenkins@burtleburtle.net), and superficially adapted
123126
* for PostgreSQL by Neil Conway. For more information on this
124-
* hash function, see http://burtleburtle.net/bob/hash/doobs.html
127+
* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
128+
* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
125129
*/
126130

127-
/*
131+
/*----------
128132
* mix -- mix 3 32-bit values reversibly.
129133
* For every delta with one or two bits set, and the deltas of all three
130134
* high bits or all three low bits, whether the original value of a,b,c
@@ -133,6 +137,7 @@ hashvarlena(PG_FUNCTION_ARGS)
133137
* have at least 1/4 probability of changing.
134138
* - If mix() is run forward, every bit of c will change between 1/3 and
135139
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
140+
*----------
136141
*/
137142
#definemix(a,b,c) \
138143
{ \
@@ -151,56 +156,52 @@ hashvarlena(PG_FUNCTION_ARGS)
151156
* hash_any() -- hash a variable-length key into a 32-bit value
152157
* k : the key (the unaligned variable-length array of bytes)
153158
* len : the length of the key, counting by bytes
154-
* Returns a 32-bit value. Every bit of the key affects every bit of
159+
*
160+
* Returns a uint32 value. Every bit of the key affects every bit of
155161
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
156162
* About 6*len+35 instructions. The best hash table sizes are powers
157163
* of 2. There is no need to do mod a prime (mod is sooo slow!).
158164
* If you need less than 32 bits, use a bitmask.
159165
*/
160166
Datum
161-
hash_any(registerconstchar*k, registerintkeylen)
167+
hash_any(registerconstunsignedchar*k, registerintkeylen)
162168
{
163-
registerDatuma,b,c,len;
164-
165-
/* Set up the internal state */
166-
len=keylen;
167-
a=b=0x9e3779b9;/* the golden ratio; an arbitrary value */
168-
/* Another arbitrary value. If the hash function is called
169-
* multiple times, this could be the previously generated
170-
* hash value; however, the interface currently doesn't allow
171-
* this. AFAIK this isn't a big deal.
172-
*/
173-
c=3923095;
174-
175-
/* handle most of the key */
176-
while (len >=12)
177-
{
178-
a+= (k[0]+((Datum)k[1]<<8)+((Datum)k[2]<<16)+((Datum)k[3]<<24));
179-
b+= (k[4]+((Datum)k[5]<<8)+((Datum)k[6]<<16)+((Datum)k[7]<<24));
180-
c+= (k[8]+((Datum)k[9]<<8)+((Datum)k[10]<<16)+((Datum)k[11]<<24));
181-
mix(a,b,c);
182-
k+=12;len-=12;
183-
}
184-
185-
/* handle the last 11 bytes */
186-
c+=keylen;
187-
switch(len)/* all the case statements fall through */
188-
{
189-
case11:c+=((Datum)k[10]<<24);
190-
case10:c+=((Datum)k[9]<<16);
191-
case9 :c+=((Datum)k[8]<<8);
192-
/* the first byte of c is reserved for the length */
193-
case8 :b+=((Datum)k[7]<<24);
194-
case7 :b+=((Datum)k[6]<<16);
195-
case6 :b+=((Datum)k[5]<<8);
196-
case5 :b+=k[4];
197-
case4 :a+=((Datum)k[3]<<24);
198-
case3 :a+=((Datum)k[2]<<16);
199-
case2 :a+=((Datum)k[1]<<8);
200-
case1 :a+=k[0];
201-
/* case 0: nothing left to add */
202-
}
203-
mix(a,b,c);
204-
/* report the result */
205-
returnc;
169+
registeruint32a,b,c,len;
170+
171+
/* Set up the internal state */
172+
len=keylen;
173+
a=b=0x9e3779b9;/* the golden ratio; an arbitrary value */
174+
c=3923095;/* initialize with an arbitrary value */
175+
176+
/* handle most of the key */
177+
while (len >=12)
178+
{
179+
a+= (k[0]+((uint32)k[1]<<8)+((uint32)k[2]<<16)+((uint32)k[3]<<24));
180+
b+= (k[4]+((uint32)k[5]<<8)+((uint32)k[6]<<16)+((uint32)k[7]<<24));
181+
c+= (k[8]+((uint32)k[9]<<8)+((uint32)k[10]<<16)+((uint32)k[11]<<24));
182+
mix(a,b,c);
183+
k+=12;len-=12;
184+
}
185+
186+
/* handle the last 11 bytes */
187+
c+=keylen;
188+
switch (len)/* all the case statements fall through */
189+
{
190+
case11:c+=((uint32)k[10]<<24);
191+
case10:c+=((uint32)k[9]<<16);
192+
case9 :c+=((uint32)k[8]<<8);
193+
/* the first byte of c is reserved for the length */
194+
case8 :b+=((uint32)k[7]<<24);
195+
case7 :b+=((uint32)k[6]<<16);
196+
case6 :b+=((uint32)k[5]<<8);
197+
case5 :b+=k[4];
198+
case4 :a+=((uint32)k[3]<<24);
199+
case3 :a+=((uint32)k[2]<<16);
200+
case2 :a+=((uint32)k[1]<<8);
201+
case1 :a+=k[0];
202+
/* case 0: nothing left to add */
203+
}
204+
mix(a,b,c);
205+
/* report the result */
206+
returnUInt32GetDatum(c);
206207
}

‎src/backend/executor/nodeHash.c

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
*
10-
*$Id: nodeHash.c,v 1.61 2002/03/06 20:49:44 momjian Exp $
10+
*$Id: nodeHash.c,v 1.62 2002/03/09 17:35:35 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -22,6 +22,7 @@
2222
#include<sys/types.h>
2323
#include<math.h>
2424

25+
#include"access/hash.h"
2526
#include"executor/execdebug.h"
2627
#include"executor/nodeHash.h"
2728
#include"executor/nodeHashjoin.h"
@@ -31,7 +32,7 @@
3132
#include"utils/lsyscache.h"
3233

3334

34-
staticinthashFunc(Datumkey,intlen,boolbyVal);
35+
staticuint32hashFunc(Datumkey,intlen,boolbyVal);
3536

3637
/* ----------------------------------------------------------------
3738
*ExecHash
@@ -553,7 +554,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
553554
bucketno=hashFunc(keyval,
554555
(int)hashtable->typLen,
555556
hashtable->typByVal)
556-
%hashtable->totalbuckets;
557+
%(uint32)hashtable->totalbuckets;
557558
}
558559

559560
#ifdefHJDEBUG
@@ -624,30 +625,29 @@ ExecScanHashBucket(HashJoinState *hjstate,
624625
/* ----------------------------------------------------------------
625626
*hashFunc
626627
*
627-
*the hash function, copied from Margo
628+
*the hash function for hash joins
628629
*
629630
*XXX this probably ought to be replaced with datatype-specific
630631
*hash functions, such as those already implemented for hash indexes.
631632
* ----------------------------------------------------------------
632633
*/
633-
staticint
634+
staticuint32
634635
hashFunc(Datumkey,intlen,boolbyVal)
635636
{
636-
unsignedinth=0;
637+
unsignedchar*k;
637638

638639
if (byVal)
639640
{
640641
/*
641-
* If it's a by-value data type, use the 'len' least significant
642-
* bytes of the Datum value. This should do the right thing on
643-
* either bigendian or littleendian hardware --- see the Datum
644-
* access macros in c.h.
642+
* If it's a by-value data type, just hash the whole Datum value.
643+
* This assumes that datatypes narrower than Datum are consistently
644+
* padded (either zero-extended or sign-extended, but not random
645+
* bits) to fill Datum; see the XXXGetDatum macros in postgres.h.
646+
* NOTE: it would not work to do hash_any(&key, len) since this
647+
* would get the wrong bytes on a big-endian machine.
645648
*/
646-
while (len-->0)
647-
{
648-
h= (h*PRIME1) ^ (key&0xFF);
649-
key >>=8;
650-
}
649+
k= (unsignedchar*)&key;
650+
len=sizeof(Datum);
651651
}
652652
else
653653
{
@@ -662,8 +662,6 @@ hashFunc(Datum key, int len, bool byVal)
662662
* freeing the detoasted copy; that happens for free when the
663663
* per-tuple memory context is reset in ExecHashGetBucket.)
664664
*/
665-
unsignedchar*k;
666-
667665
if (len<0)
668666
{
669667
structvarlena*vkey=PG_DETOAST_DATUM(key);
@@ -673,12 +671,9 @@ hashFunc(Datum key, int len, bool byVal)
673671
}
674672
else
675673
k= (unsignedchar*)DatumGetPointer(key);
676-
677-
while (len-->0)
678-
h= (h*PRIME1) ^ (*k++);
679674
}
680675

681-
returnh %PRIME2;
676+
returnDatumGetUInt32(hash_any(k,len));
682677
}
683678

684679
/* ----------------------------------------------------------------

‎src/backend/utils/adt/date.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.64 2001/11/21 05:57:33 thomas Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.65 2002/03/09 17:35:35 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1116,7 +1116,7 @@ timetz_hash(PG_FUNCTION_ARGS)
11161116
* sizeof(TimeTzADT), so that any garbage pad bytes in the structure
11171117
* won't be included in the hash!
11181118
*/
1119-
returnhash_any((char*)key,sizeof(double)+sizeof(int4));
1119+
returnhash_any((unsignedchar*)key,sizeof(double)+sizeof(int4));
11201120
}
11211121

11221122
Datum

‎src/backend/utils/adt/mac.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
*PostgreSQL type definitions for MAC addresses.
33
*
4-
*$Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.21 2001/08/21 21:23:21 tgl Exp $
4+
*$Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.22 2002/03/09 17:35:35 tgl Exp $
55
*/
66

77
#include"postgres.h"
@@ -230,7 +230,7 @@ hashmacaddr(PG_FUNCTION_ARGS)
230230
{
231231
macaddr*key=PG_GETARG_MACADDR_P(0);
232232

233-
returnhash_any((char*)key,sizeof(macaddr));
233+
returnhash_any((unsignedchar*)key,sizeof(macaddr));
234234
}
235235

236236
/*

‎src/backend/utils/adt/timestamp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.64 2002/03/06 06:10:18 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.65 2002/03/09 17:35:36 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1017,7 +1017,7 @@ interval_hash(PG_FUNCTION_ARGS)
10171017
* sizeof(Interval), so that any garbage pad bytes in the structure
10181018
* won't be included in the hash!
10191019
*/
1020-
returnhash_any((char*)key,sizeof(double)+sizeof(int4));
1020+
returnhash_any((unsignedchar*)key,sizeof(double)+sizeof(int4));
10211021
}
10221022

10231023
/* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator.

‎src/backend/utils/adt/varchar.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.87 2001/11/18 12:07:07 ishii Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.88 2002/03/09 17:35:36 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -791,7 +791,7 @@ hashbpchar(PG_FUNCTION_ARGS)
791791
keydata=VARDATA(key);
792792
keylen=bcTruelen(key);
793793

794-
result=hash_any(keydata,keylen);
794+
result=hash_any((unsignedchar*)keydata,keylen);
795795

796796
/* Avoid leaking memory for toasted inputs */
797797
PG_FREE_IF_COPY(key,0);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp