88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.57 2009/01/01 17:23:35 momjian Exp $
11+ * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.58 2009/02/09 21:18:28 tgl Exp $
1212 *
1313 * NOTES
1414 * These functions are stored in pg_amproc.For each operator class
@@ -200,39 +200,95 @@ hashvarlena(PG_FUNCTION_ARGS)
200200 * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
201201 * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
202202 *
203- * In the current code, we have adoptedan idea from Bob's 2006 update
204- *of his hash function, which is to fetch the data a word at a time when
205- *it is suitably aligned. This makes for a useful speedup, at the cost
206- *of having to maintain four code paths (aligned vs unaligned, and
207- *little-endian vs big-endian). Note that we have NOT adopted his newer
208- *mix() function, which is faster but may sacrifice some randomness .
203+ * In the current code, we have adopted Bob's 2006 update of his hash
204+ * function to fetch the data a word at a time when it is suitably aligned.
205+ * This makes for a useful speedup, at the cost of having to maintain
206+ * four code paths (aligned vs unaligned, and little-endian vs big-endian).
207+ *It also uses two separate mixing functions mix() and final(), instead
208+ *of a slower multi-purpose function .
209209 */
210210
211211/* Get a bit mask of the bits set in non-uint32 aligned addresses */
212212#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
213213
214+ /* Rotate a uint32 value left by k bits - note multiple evaluation! */
215+ #define rot (x ,k ) (((x)<<(k)) | ((x)>>(32-(k))))
216+
214217/*----------
215218 * mix -- mix 3 32-bit values reversibly.
216- * For every delta with one or two bits set, and the deltas of all three
217- * high bits or all three low bits, whether the original value of a,b,c
218- * is almost all zero or is uniformly distributed,
219- * - If mix() is run forward or backward, at least 32 bits in a,b,c
220- * have at least 1/4 probability of changing.
221- * - If mix() is run forward, every bit of c will change between 1/3 and
222- * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
219+ *
220+ * This is reversible, so any information in (a,b,c) before mix() is
221+ * still in (a,b,c) after mix().
222+ *
223+ * If four pairs of (a,b,c) inputs are run through mix(), or through
224+ * mix() in reverse, there are at least 32 bits of the output that
225+ * are sometimes the same for one pair and different for another pair.
226+ * This was tested for:
227+ * * pairs that differed by one bit, by two bits, in any combination
228+ * of top bits of (a,b,c), or in any combination of bottom bits of
229+ * (a,b,c).
230+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
231+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
232+ * is commonly produced by subtraction) look like a single 1-bit
233+ * difference.
234+ * * the base values were pseudorandom, all zero but one bit set, or
235+ * all zero plus a counter that starts at zero.
236+ *
237+ * This does not achieve avalanche. There are input bits of (a,b,c)
238+ * that fail to affect some output bits of (a,b,c), especially of a. The
239+ * most thoroughly mixed value is c, but it doesn't really even achieve
240+ * avalanche in c.
241+ *
242+ * This allows some parallelism. Read-after-writes are good at doubling
243+ * the number of bits affected, so the goal of mixing pulls in the opposite
244+ * direction from the goal of parallelism. I did what I could. Rotates
245+ * seem to cost as much as shifts on every machine I could lay my hands on,
246+ * and rotates are much kinder to the top and bottom bits, so I used rotates.
223247 *----------
224248 */
225249#define mix (a ,b ,c ) \
226250{ \
227- a -= b; a -= c; a ^= ((c)>>13); \
228- b -= c; b -= a; b ^= ((a)<<8); \
229- c -= a; c -= b; c ^= ((b)>>13); \
230- a -= b; a -= c; a ^= ((c)>>12); \
231- b -= c; b -= a; b ^= ((a)<<16); \
232- c -= a; c -= b; c ^= ((b)>>5); \
233- a -= b; a -= c; a ^= ((c)>>3);\
234- b -= c; b -= a; b ^= ((a)<<10); \
235- c -= a; c -= b; c ^= ((b)>>15); \
251+ a -= c; a ^= rot(c, 4); c += b; \
252+ b -= a; b ^= rot(a, 6); a += c; \
253+ c -= b; c ^= rot(b, 8); b += a; \
254+ a -= c; a ^= rot(c,16); c += b; \
255+ b -= a; b ^= rot(a,19); a += c; \
256+ c -= b; c ^= rot(b, 4); b += a; \
257+ }
258+
259+ /*----------
260+ * final -- final mixing of 3 32-bit values (a,b,c) into c
261+ *
262+ * Pairs of (a,b,c) values differing in only a few bits will usually
263+ * produce values of c that look totally different. This was tested for
264+ * * pairs that differed by one bit, by two bits, in any combination
265+ * of top bits of (a,b,c), or in any combination of bottom bits of
266+ * (a,b,c).
267+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
268+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
269+ * is commonly produced by subtraction) look like a single 1-bit
270+ * difference.
271+ * * the base values were pseudorandom, all zero but one bit set, or
272+ * all zero plus a counter that starts at zero.
273+ *
274+ * The use of separate functions for mix() and final() allow for a
275+ * substantial performance increase since final() does not need to
276+ * do well in reverse, but is does need to affect all output bits.
277+ * mix(), on the other hand, does not need to affect all output
278+ * bits (affecting 32 bits is enough). The original hash function had
279+ * a single mixing operation that had to satisfy both sets of requirements
280+ * and was slower as a result.
281+ *----------
282+ */
283+ #define final (a ,b ,c ) \
284+ { \
285+ c ^= b; c -= rot(b,14); \
286+ a ^= c; a -= rot(c,11); \
287+ b ^= a; b -= rot(a,25); \
288+ c ^= b; c -= rot(b,16); \
289+ a ^= c; a -= rot(c, 4); \
290+ b ^= a; b -= rot(a,14); \
291+ c ^= b; c -= rot(b,24); \
236292}
237293
238294/*
@@ -260,8 +316,7 @@ hash_any(register const unsigned char *k, register int keylen)
260316
261317/* Set up the internal state */
262318len = keylen ;
263- a = b = 0x9e3779b9 ;/* the golden ratio; an arbitrary value */
264- c = 3923095 ;/* initialize with an arbitrary value */
319+ a = b = c = 0x9e3779b9 + len + 3923095 ;
265320
266321/* If the source pointer is word-aligned, we use word-wide fetches */
267322if (((long )k & UINT32_ALIGN_MASK )== 0 )
@@ -282,7 +337,6 @@ hash_any(register const unsigned char *k, register int keylen)
282337
283338/* handle the last 11 bytes */
284339k = (const unsignedchar * )ka ;
285- c += keylen ;
286340#ifdef WORDS_BIGENDIAN
287341switch (len )
288342{
@@ -385,7 +439,6 @@ hash_any(register const unsigned char *k, register int keylen)
385439}
386440
387441/* handle the last 11 bytes */
388- c += keylen ;
389442#ifdef WORDS_BIGENDIAN
390443switch (len )/* all the case statements fall through */
391444{
@@ -445,7 +498,7 @@ hash_any(register const unsigned char *k, register int keylen)
445498#endif /* WORDS_BIGENDIAN */
446499}
447500
448- mix (a ,b ,c );
501+ final (a ,b ,c );
449502
450503/* report the result */
451504return UInt32GetDatum (c );
@@ -465,11 +518,10 @@ hash_uint32(uint32 k)
465518b ,
466519c ;
467520
468- a = 0x9e3779b9 + k ;
469- b = 0x9e3779b9 ;
470- c = 3923095 + (uint32 )sizeof (uint32 );
521+ a = b = c = 0x9e3779b9 + (uint32 )sizeof (uint32 )+ 3923095 ;
522+ a += k ;
471523
472- mix (a ,b ,c );
524+ final (a ,b ,c );
473525
474526/* report the result */
475527return UInt32GetDatum (c );