NotificationsYou must be signed in to change notification settings
Fork5
Star27

Commit8205258

committed

Adopt Bob Jenkins' improved hash function for hash_any(). This changes the

contents of hash indexes (again), so bump catversion.Kenneth Marshall

1 parent834a6da commit8205258Copy full SHA for 8205258

File tree

5 files changed

+163

-111

lines changed

src
- backend/access/hash
  - hashfunc.c
- include/catalog
  - catversion.h
- test/regress
  - expected
    - polymorphism.out
    - union.out
  - sql
    - polymorphism.sql

5 files changed

+163

-111

lines changed

`‎src/backend/access/hash/hashfunc.c‎`

Lines changed: 84 additions & 32 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.57 2009/01/01 17:23:35 momjian Exp $`
	`11`	`+ * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.58 2009/02/09 21:18:28 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`* NOTES`
`14`	`14`	`* These functions are stored in pg_amproc.For each operator class`
`@@ -200,39 +200,95 @@ hashvarlena(PG_FUNCTION_ARGS)`
`200`	`200`	`* hash function, see http://burtleburtle.net/bob/hash/doobs.html,`
`201`	`201`	`* or Bob's article in Dr. Dobb's Journal, Sept. 1997.`
`202`	`202`	`*`
`203`		`- * In the current code, we have adoptedan idea fromBob's 2006 update`
`204`		`- *of his hashfunction, which isto fetch the data a word at a time when`
`205`		`- *it is suitably aligned.This makes for a useful speedup, at the cost`
`206`		`- *of having to maintainfour code paths (aligned vs unaligned, and`
`207`		`- *little-endian vs big-endian). Note that we have NOT adopted his newer`
`208`		`- *mix() function, which is faster but may sacrifice some randomness.`
	`203`	`+ * In the current code, we have adopted Bob's 2006 update of his hash`
	`204`	`+ * functionto fetch the data a word at a time when it is suitably aligned.`
	`205`	`+ * This makes for a useful speedup, at the cost of having to maintain`
	`206`	`+ * four code paths (aligned vs unaligned, and little-endian vs big-endian).`
	`207`	`+ *It also uses two separate mixing functions mix() and final(), instead`
	`208`	`+ *of a slower multi-purpose function.`
`209`	`209`	`*/`
`210`	`210`
`211`	`211`	`/* Get a bit mask of the bits set in non-uint32 aligned addresses */`
`212`	`212`	`#defineUINT32_ALIGN_MASK (sizeof(uint32) - 1)`
`213`	`213`
	`214`	`+/* Rotate a uint32 value left by k bits - note multiple evaluation! */`
	`215`	`+#definerot(x,k) (((x)<<(k)) \| ((x)>>(32-(k))))`
	`216`	`+`
`214`	`217`	`/*----------`
`215`	`218`	`* mix -- mix 3 32-bit values reversibly.`
`216`		`- * For every delta with one or two bits set, and the deltas of all three`
`217`		`- * high bits or all three low bits, whether the original value of a,b,c`
`218`		`- * is almost all zero or is uniformly distributed,`
`219`		`- * - If mix() is run forward or backward, at least 32 bits in a,b,c`
`220`		`- * have at least 1/4 probability of changing.`
`221`		`- * - If mix() is run forward, every bit of c will change between 1/3 and`
`222`		`- * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)`
	`219`	`+ *`
	`220`	`+ * This is reversible, so any information in (a,b,c) before mix() is`
	`221`	`+ * still in (a,b,c) after mix().`
	`222`	`+ *`
	`223`	`+ * If four pairs of (a,b,c) inputs are run through mix(), or through`
	`224`	`+ * mix() in reverse, there are at least 32 bits of the output that`
	`225`	`+ * are sometimes the same for one pair and different for another pair.`
	`226`	`+ * This was tested for:`
	`227`	`+ * * pairs that differed by one bit, by two bits, in any combination`
	`228`	`+ * of top bits of (a,b,c), or in any combination of bottom bits of`
	`229`	`+ * (a,b,c).`
	`230`	`+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed`
	`231`	`+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as`
	`232`	`+ * is commonly produced by subtraction) look like a single 1-bit`
	`233`	`+ * difference.`
	`234`	`+ * * the base values were pseudorandom, all zero but one bit set, or`
	`235`	`+ * all zero plus a counter that starts at zero.`
	`236`	`+ *`
	`237`	`+ * This does not achieve avalanche. There are input bits of (a,b,c)`
	`238`	`+ * that fail to affect some output bits of (a,b,c), especially of a. The`
	`239`	`+ * most thoroughly mixed value is c, but it doesn't really even achieve`
	`240`	`+ * avalanche in c.`
	`241`	`+ *`
	`242`	`+ * This allows some parallelism. Read-after-writes are good at doubling`
	`243`	`+ * the number of bits affected, so the goal of mixing pulls in the opposite`
	`244`	`+ * direction from the goal of parallelism. I did what I could. Rotates`
	`245`	`+ * seem to cost as much as shifts on every machine I could lay my hands on,`
	`246`	`+ * and rotates are much kinder to the top and bottom bits, so I used rotates.`
`223`	`247`	`*----------`
`224`	`248`	`*/`
`225`	`249`	`#definemix(a,b,c) \`
`226`	`250`	`{ \`
`227`		`- a -= b; a -= c; a ^= ((c)>>13); \`
`228`		`- b -= c; b -= a; b ^= ((a)<<8); \`
`229`		`- c -= a; c -= b; c ^= ((b)>>13); \`
`230`		`- a -= b; a -= c; a ^= ((c)>>12); \`
`231`		`- b -= c; b -= a; b ^= ((a)<<16); \`
`232`		`- c -= a; c -= b; c ^= ((b)>>5); \`
`233`		`- a -= b; a -= c; a ^= ((c)>>3);\`
`234`		`- b -= c; b -= a; b ^= ((a)<<10); \`
`235`		`- c -= a; c -= b; c ^= ((b)>>15); \`
	`251`	`+ a -= c; a ^= rot(c, 4); c += b; \`
	`252`	`+ b -= a; b ^= rot(a, 6); a += c; \`
	`253`	`+ c -= b; c ^= rot(b, 8); b += a; \`
	`254`	`+ a -= c; a ^= rot(c,16); c += b; \`
	`255`	`+ b -= a; b ^= rot(a,19); a += c; \`
	`256`	`+ c -= b; c ^= rot(b, 4); b += a; \`
	`257`	`+}`
	`258`	`+`
	`259`	`+/*----------`
	`260`	`+ * final -- final mixing of 3 32-bit values (a,b,c) into c`
	`261`	`+ *`
	`262`	`+ * Pairs of (a,b,c) values differing in only a few bits will usually`
	`263`	`+ * produce values of c that look totally different. This was tested for`
	`264`	`+ * * pairs that differed by one bit, by two bits, in any combination`
	`265`	`+ * of top bits of (a,b,c), or in any combination of bottom bits of`
	`266`	`+ * (a,b,c).`
	`267`	`+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed`
	`268`	`+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as`
	`269`	`+ * is commonly produced by subtraction) look like a single 1-bit`
	`270`	`+ * difference.`
	`271`	`+ * * the base values were pseudorandom, all zero but one bit set, or`
	`272`	`+ * all zero plus a counter that starts at zero.`
	`273`	`+ *`
	`274`	`+ * The use of separate functions for mix() and final() allow for a`
	`275`	`+ * substantial performance increase since final() does not need to`
	`276`	`+ * do well in reverse, but is does need to affect all output bits.`
	`277`	`+ * mix(), on the other hand, does not need to affect all output`
	`278`	`+ * bits (affecting 32 bits is enough). The original hash function had`
	`279`	`+ * a single mixing operation that had to satisfy both sets of requirements`
	`280`	`+ * and was slower as a result.`
	`281`	`+ *----------`
	`282`	`+ */`
	`283`	`+#definefinal(a,b,c) \`
	`284`	`+{ \`
	`285`	`+ c ^= b; c -= rot(b,14); \`
	`286`	`+ a ^= c; a -= rot(c,11); \`
	`287`	`+ b ^= a; b -= rot(a,25); \`
	`288`	`+ c ^= b; c -= rot(b,16); \`
	`289`	`+ a ^= c; a -= rot(c, 4); \`
	`290`	`+ b ^= a; b -= rot(a,14); \`
	`291`	`+ c ^= b; c -= rot(b,24); \`
`236`	`292`	`}`
`237`	`293`
`238`	`294`	`/*`
`@@ -260,8 +316,7 @@ hash_any(register const unsigned char *k, register int keylen)`
`260`	`316`
`261`	`317`	`/* Set up the internal state */`
`262`	`318`	`len=keylen;`
`263`		`-a=b=0x9e3779b9;/* the golden ratio; an arbitrary value */`
`264`		`-c=3923095;/* initialize with an arbitrary value */`
	`319`	`+a=b=c=0x9e3779b9+len+3923095;`
`265`	`320`
`266`	`321`	`/* If the source pointer is word-aligned, we use word-wide fetches */`
`267`	`322`	`if (((long)k&UINT32_ALIGN_MASK)==0)`
`@@ -282,7 +337,6 @@ hash_any(register const unsigned char *k, register int keylen)`
`282`	`337`
`283`	`338`	`/* handle the last 11 bytes */`
`284`	`339`	`k= (constunsignedchar*)ka;`
`285`		`-c+=keylen;`
`286`	`340`	`#ifdefWORDS_BIGENDIAN`
`287`	`341`	`switch (len)`
`288`	`342`	`{`
`@@ -385,7 +439,6 @@ hash_any(register const unsigned char *k, register int keylen)`
`385`	`439`	`}`
`386`	`440`
`387`	`441`	`/* handle the last 11 bytes */`
`388`		`-c+=keylen;`
`389`	`442`	`#ifdefWORDS_BIGENDIAN`
`390`	`443`	`switch (len)/* all the case statements fall through */`
`391`	`444`	`{`
`@@ -445,7 +498,7 @@ hash_any(register const unsigned char *k, register int keylen)`
`445`	`498`	`#endif/* WORDS_BIGENDIAN */`
`446`	`499`	`}`
`447`	`500`
`448`		`-mix(a,b,c);`
	`501`	`+final(a,b,c);`
`449`	`502`
`450`	`503`	`/* report the result */`
`451`	`504`	`returnUInt32GetDatum(c);`
`@@ -465,11 +518,10 @@ hash_uint32(uint32 k)`
`465`	`518`	`b,`
`466`	`519`	`c;`
`467`	`520`
`468`		`-a=0x9e3779b9+k;`
`469`		`-b=0x9e3779b9;`
`470`		`-c=3923095+ (uint32)sizeof(uint32);`
	`521`	`+a=b=c=0x9e3779b9+ (uint32)sizeof(uint32)+3923095;`
	`522`	`+a+=k;`
`471`	`523`
`472`		`-mix(a,b,c);`
	`524`	`+final(a,b,c);`
`473`	`525`
`474`	`526`	`/* report the result */`
`475`	`527`	`returnUInt32GetDatum(c);`

`‎src/include/catalog/catversion.h‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@`
`37`	`37`	`* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group`
`38`	`38`	`* Portions Copyright (c) 1994, Regents of the University of California`
`39`	`39`	`*`
`40`		`- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.522 2009/02/0920:57:59 alvherre Exp $`
	`40`	`+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.523 2009/02/0921:18:28 tgl Exp $`
`41`	`41`	`*`
`42`	`42`	`*-------------------------------------------------------------------------`
`43`	`43`	`*/`
`@@ -53,6 +53,6 @@`
`53`	`53`	`*/`
`54`	`54`
`55`	`55`	`/yyyymmddN /`
`56`		`-#defineCATALOG_VERSION_NO200902091`
	`56`	`+#defineCATALOG_VERSION_NO200902092`
`57`	`57`
`58`	`58`	`#endif`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit8205258

File tree

5 files changed

5 files changed

`‎src/backend/access/hash/hashfunc.c‎`

`‎src/include/catalog/catversion.h‎`

0 commit comments