Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8205258

Browse files
committed
Adopt Bob Jenkins' improved hash function for hash_any(). This changes the
contents of hash indexes (again), so bump catversion.Kenneth Marshall
1 parent834a6da commit8205258

File tree

5 files changed

+163
-111
lines changed

5 files changed

+163
-111
lines changed

‎src/backend/access/hash/hashfunc.c

Lines changed: 84 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.57 2009/01/01 17:23:35 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.58 2009/02/09 21:18:28 tgl Exp $
1212
*
1313
* NOTES
1414
* These functions are stored in pg_amproc.For each operator class
@@ -200,39 +200,95 @@ hashvarlena(PG_FUNCTION_ARGS)
200200
* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
201201
* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
202202
*
203-
* In the current code, we have adoptedan idea fromBob's 2006 update
204-
*of his hashfunction, which isto fetch the data a word at a time when
205-
*it is suitably aligned.This makes for a useful speedup, at the cost
206-
*of having to maintainfour code paths (aligned vs unaligned, and
207-
*little-endian vs big-endian). Note that we have NOT adopted his newer
208-
*mix() function, which is faster but may sacrifice some randomness.
203+
* In the current code, we have adopted Bob's 2006 update of his hash
204+
* functionto fetch the data a word at a time when it is suitably aligned.
205+
* This makes for a useful speedup, at the cost of having to maintain
206+
* four code paths (aligned vs unaligned, and little-endian vs big-endian).
207+
*It also uses two separate mixing functions mix() and final(), instead
208+
*of a slower multi-purpose function.
209209
*/
210210

211211
/* Get a bit mask of the bits set in non-uint32 aligned addresses */
212212
#defineUINT32_ALIGN_MASK (sizeof(uint32) - 1)
213213

214+
/* Rotate a uint32 value left by k bits - note multiple evaluation! */
215+
#definerot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
216+
214217
/*----------
215218
* mix -- mix 3 32-bit values reversibly.
216-
* For every delta with one or two bits set, and the deltas of all three
217-
* high bits or all three low bits, whether the original value of a,b,c
218-
* is almost all zero or is uniformly distributed,
219-
* - If mix() is run forward or backward, at least 32 bits in a,b,c
220-
* have at least 1/4 probability of changing.
221-
* - If mix() is run forward, every bit of c will change between 1/3 and
222-
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
219+
*
220+
* This is reversible, so any information in (a,b,c) before mix() is
221+
* still in (a,b,c) after mix().
222+
*
223+
* If four pairs of (a,b,c) inputs are run through mix(), or through
224+
* mix() in reverse, there are at least 32 bits of the output that
225+
* are sometimes the same for one pair and different for another pair.
226+
* This was tested for:
227+
* * pairs that differed by one bit, by two bits, in any combination
228+
* of top bits of (a,b,c), or in any combination of bottom bits of
229+
* (a,b,c).
230+
* * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
231+
* the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
232+
* is commonly produced by subtraction) look like a single 1-bit
233+
* difference.
234+
* * the base values were pseudorandom, all zero but one bit set, or
235+
* all zero plus a counter that starts at zero.
236+
*
237+
* This does not achieve avalanche. There are input bits of (a,b,c)
238+
* that fail to affect some output bits of (a,b,c), especially of a. The
239+
* most thoroughly mixed value is c, but it doesn't really even achieve
240+
* avalanche in c.
241+
*
242+
* This allows some parallelism. Read-after-writes are good at doubling
243+
* the number of bits affected, so the goal of mixing pulls in the opposite
244+
* direction from the goal of parallelism. I did what I could. Rotates
245+
* seem to cost as much as shifts on every machine I could lay my hands on,
246+
* and rotates are much kinder to the top and bottom bits, so I used rotates.
223247
*----------
224248
*/
225249
#definemix(a,b,c) \
226250
{ \
227-
a -= b; a -= c; a ^= ((c)>>13); \
228-
b -= c; b -= a; b ^= ((a)<<8); \
229-
c -= a; c -= b; c ^= ((b)>>13); \
230-
a -= b; a -= c; a ^= ((c)>>12); \
231-
b -= c; b -= a; b ^= ((a)<<16); \
232-
c -= a; c -= b; c ^= ((b)>>5); \
233-
a -= b; a -= c; a ^= ((c)>>3);\
234-
b -= c; b -= a; b ^= ((a)<<10); \
235-
c -= a; c -= b; c ^= ((b)>>15); \
251+
a -= c; a ^= rot(c, 4); c += b; \
252+
b -= a; b ^= rot(a, 6); a += c; \
253+
c -= b; c ^= rot(b, 8); b += a; \
254+
a -= c; a ^= rot(c,16); c += b; \
255+
b -= a; b ^= rot(a,19); a += c; \
256+
c -= b; c ^= rot(b, 4); b += a; \
257+
}
258+
259+
/*----------
260+
* final -- final mixing of 3 32-bit values (a,b,c) into c
261+
*
262+
* Pairs of (a,b,c) values differing in only a few bits will usually
263+
* produce values of c that look totally different. This was tested for
264+
* * pairs that differed by one bit, by two bits, in any combination
265+
* of top bits of (a,b,c), or in any combination of bottom bits of
266+
* (a,b,c).
267+
* * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
268+
* the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
269+
* is commonly produced by subtraction) look like a single 1-bit
270+
* difference.
271+
* * the base values were pseudorandom, all zero but one bit set, or
272+
* all zero plus a counter that starts at zero.
273+
*
274+
* The use of separate functions for mix() and final() allow for a
275+
* substantial performance increase since final() does not need to
276+
* do well in reverse, but is does need to affect all output bits.
277+
* mix(), on the other hand, does not need to affect all output
278+
* bits (affecting 32 bits is enough). The original hash function had
279+
* a single mixing operation that had to satisfy both sets of requirements
280+
* and was slower as a result.
281+
*----------
282+
*/
283+
#definefinal(a,b,c) \
284+
{ \
285+
c ^= b; c -= rot(b,14); \
286+
a ^= c; a -= rot(c,11); \
287+
b ^= a; b -= rot(a,25); \
288+
c ^= b; c -= rot(b,16); \
289+
a ^= c; a -= rot(c, 4); \
290+
b ^= a; b -= rot(a,14); \
291+
c ^= b; c -= rot(b,24); \
236292
}
237293

238294
/*
@@ -260,8 +316,7 @@ hash_any(register const unsigned char *k, register int keylen)
260316

261317
/* Set up the internal state */
262318
len=keylen;
263-
a=b=0x9e3779b9;/* the golden ratio; an arbitrary value */
264-
c=3923095;/* initialize with an arbitrary value */
319+
a=b=c=0x9e3779b9+len+3923095;
265320

266321
/* If the source pointer is word-aligned, we use word-wide fetches */
267322
if (((long)k&UINT32_ALIGN_MASK)==0)
@@ -282,7 +337,6 @@ hash_any(register const unsigned char *k, register int keylen)
282337

283338
/* handle the last 11 bytes */
284339
k= (constunsignedchar*)ka;
285-
c+=keylen;
286340
#ifdefWORDS_BIGENDIAN
287341
switch (len)
288342
{
@@ -385,7 +439,6 @@ hash_any(register const unsigned char *k, register int keylen)
385439
}
386440

387441
/* handle the last 11 bytes */
388-
c+=keylen;
389442
#ifdefWORDS_BIGENDIAN
390443
switch (len)/* all the case statements fall through */
391444
{
@@ -445,7 +498,7 @@ hash_any(register const unsigned char *k, register int keylen)
445498
#endif/* WORDS_BIGENDIAN */
446499
}
447500

448-
mix(a,b,c);
501+
final(a,b,c);
449502

450503
/* report the result */
451504
returnUInt32GetDatum(c);
@@ -465,11 +518,10 @@ hash_uint32(uint32 k)
465518
b,
466519
c;
467520

468-
a=0x9e3779b9+k;
469-
b=0x9e3779b9;
470-
c=3923095+ (uint32)sizeof(uint32);
521+
a=b=c=0x9e3779b9+ (uint32)sizeof(uint32)+3923095;
522+
a+=k;
471523

472-
mix(a,b,c);
524+
final(a,b,c);
473525

474526
/* report the result */
475527
returnUInt32GetDatum(c);

‎src/include/catalog/catversion.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
3838
* Portions Copyright (c) 1994, Regents of the University of California
3939
*
40-
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.522 2009/02/0920:57:59 alvherre Exp $
40+
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.523 2009/02/0921:18:28 tgl Exp $
4141
*
4242
*-------------------------------------------------------------------------
4343
*/
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/*yyyymmddN */
56-
#defineCATALOG_VERSION_NO200902091
56+
#defineCATALOG_VERSION_NO200902092
5757

5858
#endif

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp