Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4864c8e

Browse files
committed
Use direct function calls for pg_popcount{32,64} on non-x86 platforms
Previously, all pg_popcount{32,64} calls were indirected througha function pointer, even though we had no fast implementation fornon-x86 platforms. Instead, for those platforms use wrappers aroundthe pg_popcount{32,64}_slow functions.Review and additional hacking by David RowleyReviewed by Álvaro HerreraDiscussion:https://www.postgresql.org/message-id/flat/CAFBsxsE7otwnfA36Ly44zZO%2Bb7AEWHRFANxR1h1kxveEV%3DghLQ%40mail.gmail.com
1 parentea499f3 commit4864c8e

File tree

2 files changed

+53
-27
lines changed

2 files changed

+53
-27
lines changed

‎src/include/port/pg_bitutils.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,40 @@ pg_ceil_log2_64(uint64 num)
253253
returnpg_leftmost_one_pos64(num-1)+1;
254254
}
255255

256-
/* Count the number of one-bits in a uint32 or uint64 */
256+
/*
257+
* With MSVC on x86_64 builds, try using native popcnt instructions via the
258+
* __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's
259+
* __builtin_popcount* intrinsic functions as they always emit popcnt
260+
* instructions.
261+
*/
262+
#if defined(_MSC_VER)&& defined(_M_AMD64)
263+
#defineHAVE_X86_64_POPCNTQ
264+
#endif
265+
266+
/*
267+
* On x86_64, we can use the hardware popcount instruction, but only if
268+
* we can verify that the CPU supports it via the cpuid instruction.
269+
*
270+
* Otherwise, we fall back to a hand-rolled implementation.
271+
*/
272+
#ifdefHAVE_X86_64_POPCNTQ
273+
#if defined(HAVE__GET_CPUID)|| defined(HAVE__CPUID)
274+
#defineTRY_POPCNT_FAST 1
275+
#endif
276+
#endif
277+
278+
#ifdefTRY_POPCNT_FAST
279+
/* Attempt to use the POPCNT instruction, but perform a runtime check first */
257280
externint(*pg_popcount32) (uint32word);
258281
externint(*pg_popcount64) (uint64word);
259282

283+
#else
284+
/* Use a portable implementation -- no need for a function pointer. */
285+
externintpg_popcount32(uint32word);
286+
externintpg_popcount64(uint64word);
287+
288+
#endif/* TRY_POPCNT_FAST */
289+
260290
/* Count the number of one-bits in a byte array */
261291
externuint64pg_popcount(constchar*buf,intbytes);
262292

‎src/port/pg_bitutils.c

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -103,29 +103,6 @@ const uint8 pg_number_of_ones[256] = {
103103
4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
104104
};
105105

106-
/*
107-
* With MSVC on x86_64 builds, try using native popcnt instructions via the
108-
* __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's
109-
* __builtin_popcount* intrinsic functions as they always emit popcnt
110-
* instructions.
111-
*/
112-
#if defined(_MSC_VER)&& defined(_M_AMD64)
113-
#defineHAVE_X86_64_POPCNTQ
114-
#endif
115-
116-
/*
117-
* On x86_64, we can use the hardware popcount instruction, but only if
118-
* we can verify that the CPU supports it via the cpuid instruction.
119-
*
120-
* Otherwise, we fall back to __builtin_popcount if the compiler has that,
121-
* or a hand-rolled implementation if not.
122-
*/
123-
#ifdefHAVE_X86_64_POPCNTQ
124-
#if defined(HAVE__GET_CPUID)|| defined(HAVE__CPUID)
125-
#defineTRY_POPCNT_FAST 1
126-
#endif
127-
#endif
128-
129106
staticintpg_popcount32_slow(uint32word);
130107
staticintpg_popcount64_slow(uint64word);
131108

@@ -138,9 +115,6 @@ static intpg_popcount64_fast(uint64 word);
138115

139116
int(*pg_popcount32) (uint32word)=pg_popcount32_choose;
140117
int(*pg_popcount64) (uint64word)=pg_popcount64_choose;
141-
#else
142-
int(*pg_popcount32) (uint32word)=pg_popcount32_slow;
143-
int(*pg_popcount64) (uint64word)=pg_popcount64_slow;
144118
#endif/* TRY_POPCNT_FAST */
145119

146120
#ifdefTRY_POPCNT_FAST
@@ -291,6 +265,28 @@ pg_popcount64_slow(uint64 word)
291265
#endif/* HAVE__BUILTIN_POPCOUNT */
292266
}
293267

268+
#ifndefTRY_POPCNT_FAST
269+
270+
/*
271+
* When the POPCNT instruction is not available, there's no point in using
272+
* function pointers to vary the implementation between the fast and slow
273+
* method. We instead just make these actual external functions when
274+
* TRY_POPCNT_FAST is not defined. The compiler should be able to inline
275+
* the slow versions here.
276+
*/
277+
int
278+
pg_popcount32(uint32word)
279+
{
280+
returnpg_popcount32_slow(word);
281+
}
282+
283+
int
284+
pg_popcount64(uint64word)
285+
{
286+
returnpg_popcount64_slow(word);
287+
}
288+
289+
#endif/* !TRY_POPCNT_FAST */
294290

295291
/*
296292
* pg_popcount

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp