Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0aba255

Browse files
committed
Add optimized C string hashing
Given an already-initialized hash state and a NUL-terminated string,accumulate the hash of the string into the hash state and return thelength for the caller to (optionally) save for the finalizer. Thisavoids a strlen call.If the string pointer is aligned, we can use a word-at-a-timealgorithm for NUL lookahead. The aligned case is only used on 64-bitplatforms, since it's not worth the extra complexity for 32-bit.Handling the tail of the string after finishing the word-wise loopwas inspired by NetBSD's strlen(), but no code was taken since thatis written in assembly language.As demonstration, use this in the search path cache. This brings thegeneral case performance closer to the special case optimization donein commita86c61c. There are other places that could benefit, butthat is left for future work.Jeff Davis and John NaylorReviewed by Heikki Linnakangas, Jian He, Junwang ZhaoDiscussion:https://postgr.es/m/3820f030fd008ff14134b3e9ce5cc6dd623ed479.camel%40j-davis.comDiscussion:https://postgr.es/m/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
1 parente97b672 commit0aba255

File tree

2 files changed

+145
-5
lines changed

2 files changed

+145
-5
lines changed

‎src/backend/catalog/namespace.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
#include"catalog/pg_ts_template.h"
4242
#include"catalog/pg_type.h"
4343
#include"commands/dbcommands.h"
44-
#include"common/hashfn.h"
44+
#include"common/hashfn_unstable.h"
4545
#include"funcapi.h"
4646
#include"mb/pg_wchar.h"
4747
#include"miscadmin.h"
@@ -253,11 +253,21 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
253253
staticinlineuint32
254254
spcachekey_hash(SearchPathCacheKeykey)
255255
{
256-
constunsignedchar*bytes= (constunsignedchar*)key.searchPath;
257-
intblen=strlen(key.searchPath);
256+
fasthash_statehs;
257+
intsp_len;
258258

259-
returnhash_combine(hash_bytes(bytes,blen),
260-
hash_uint32(key.roleid));
259+
fasthash_init(&hs,FH_UNKNOWN_LENGTH,0);
260+
261+
hs.accum=key.roleid;
262+
fasthash_combine(&hs);
263+
264+
/*
265+
* Combine search path into the hash and save the length for tweaking the
266+
* final mix.
267+
*/
268+
sp_len=fasthash_accum_cstring(&hs,key.searchPath);
269+
270+
returnfasthash_final32(&hs,sp_len);
261271
}
262272

263273
staticinlinebool

‎src/include/common/hashfn_unstable.h

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,24 @@
5858
* 2) Incremental interface. This can used for incorporating multiple
5959
* inputs. The standalone functions use this internally, so see fasthash64()
6060
* for an an example of how this works.
61+
*
62+
* The incremental interface is especially useful if any of the inputs
63+
* are NUL-terminated C strings, since the length is not needed ahead
64+
* of time. This avoids needing to call strlen(). This case is optimized
65+
* in fasthash_accum_cstring() :
66+
*
67+
* fasthash_state hs;
68+
* fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
69+
* len = fasthash_accum_cstring(&hs, *str);
70+
* ...
71+
* return fasthash_final32(&hs, len);
72+
*
73+
* Here we pass FH_UNKNOWN_LENGTH as a convention, since passing zero
74+
* would zero out the internal seed as well. fasthash_accum_cstring()
75+
* returns the length of the string, which is computed on-the-fly while
76+
* mixing the string into the hash. Experimentation has found that
77+
* SMHasher fails unless we incorporate the length, so it is passed to
78+
* the finalizer as a tweak.
6179
*/
6280

6381

@@ -151,6 +169,118 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
151169
fasthash_combine(hs);
152170
}
153171

172+
/*
173+
* Set high bit in lowest byte where the input is zero, from:
174+
* https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
175+
*/
176+
#definehaszero64(v) \
177+
(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
178+
179+
/*
180+
* all-purpose workhorse for fasthash_accum_cstring
181+
*/
182+
staticinlineint
183+
fasthash_accum_cstring_unaligned(fasthash_state*hs,constchar*str)
184+
{
185+
constchar*conststart=str;
186+
187+
while (*str)
188+
{
189+
intchunk_len=0;
190+
191+
while (chunk_len<FH_SIZEOF_ACCUM&&str[chunk_len]!='\0')
192+
chunk_len++;
193+
194+
fasthash_accum(hs,str,chunk_len);
195+
str+=chunk_len;
196+
}
197+
198+
returnstr-start;
199+
}
200+
201+
/*
202+
* specialized workhorse for fasthash_accum_cstring
203+
*
204+
* With an aligned pointer, we consume the string a word at a time.
205+
* Loading the word containing the NUL terminator cannot segfault since
206+
* allocation boundaries are suitably aligned.
207+
*/
208+
staticinlineint
209+
fasthash_accum_cstring_aligned(fasthash_state*hs,constchar*str)
210+
{
211+
constchar*conststart=str;
212+
intremainder;
213+
uint64zero_bytes_le;
214+
215+
Assert(PointerIsAligned(start,uint64));
216+
for (;;)
217+
{
218+
uint64chunk=*(uint64*)str;
219+
220+
/*
221+
* With little-endian representation, we can use this calculation,
222+
* which sets bits in the first byte in the result word that
223+
* corresponds to a zero byte in the original word. The rest of the
224+
* bytes are indeterminate, so cannot be used on big-endian machines
225+
* without either swapping or a bytewise check.
226+
*/
227+
#ifdefWORDS_BIGENDIAN
228+
zero_bytes_le=haszero64(pg_bswap(chunk));
229+
#else
230+
zero_bytes_le=haszero64(chunk);
231+
#endif
232+
if (zero_bytes_le)
233+
break;
234+
235+
hs->accum=chunk;
236+
fasthash_combine(hs);
237+
str+=FH_SIZEOF_ACCUM;
238+
}
239+
240+
/*
241+
* For the last word, only use bytes up to the NUL for the hash. Bytes
242+
* with set bits will be 0x80, so calculate the first occurrence of a zero
243+
* byte within the input word by counting the number of trailing (because
244+
* little-endian) zeros and dividing the result by 8.
245+
*/
246+
remainder=pg_rightmost_one_pos64(zero_bytes_le) /BITS_PER_BYTE;
247+
fasthash_accum(hs,str,remainder);
248+
str+=remainder;
249+
250+
returnstr-start;
251+
}
252+
253+
/*
254+
* Mix 'str' into the hash state and return the length of the string.
255+
*/
256+
staticinlineint
257+
fasthash_accum_cstring(fasthash_state*hs,constchar*str)
258+
{
259+
#ifSIZEOF_VOID_P >=8
260+
261+
intlen;
262+
#ifdefUSE_ASSERT_CHECKING
263+
intlen_check;
264+
fasthash_statehs_check;
265+
266+
memcpy(&hs_check,hs,sizeof(fasthash_state));
267+
len_check=fasthash_accum_cstring_unaligned(&hs_check,str);
268+
#endif
269+
if (PointerIsAligned(str,uint64))
270+
{
271+
len=fasthash_accum_cstring_aligned(hs,str);
272+
Assert(hs_check.hash==hs->hash&&len_check==len);
273+
returnlen;
274+
}
275+
#endif/* SIZEOF_VOID_P */
276+
277+
/*
278+
* It's not worth it to try to make the word-at-a-time optimization work
279+
* on 32-bit platforms.
280+
*/
281+
returnfasthash_accum_cstring_unaligned(hs,str);
282+
}
283+
154284
/*
155285
* The finalizer
156286
*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp