Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit598e011

Browse files
Fix code for probing availability of AVX-512.
This commit fixes a few things:* Instead of checking for CPU support of the "xsave" extension, we need to check for OS support of XGETBV instructions via the "osxsave" flag.* We must check that additional XCR0 bits are set to be sure the ZMM registers are fully enabled.* We should use the recommended ordering of steps. Specifically, we need to check that the ZMM registers are enabled prior to checking for AVX-512 via CPUID.In passing, split this code into separate functions to improvereadability.Reported-by: Andrew KaneReviewed-by: Akash Shankaran, Raghuveer DevulapalliDiscussion:https://postgr.es/m/20240418024459.GA3385227%40nathanxps13
1 parentbb3ca23 commit598e011

File tree

1 file changed

+47
-33
lines changed

1 file changed

+47
-33
lines changed

‎src/port/pg_popcount_avx512_choose.c

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,55 +34,69 @@
3434
#ifdefTRY_POPCNT_FAST
3535

3636
/*
37-
* Returns true if the CPU supports the instructions required for the AVX-512
38-
* pg_popcount() implementation.
37+
* Does CPUID say there's support for XSAVE instructions?
3938
*/
40-
bool
41-
pg_popcount_avx512_available(void)
39+
staticinlinebool
40+
xsave_available(void)
4241
{
4342
unsignedintexx[4]= {0,0,0,0};
4443

45-
/* Does CPUID say there's support for AVX-512 popcount instructions? */
46-
#if defined(HAVE__GET_CPUID_COUNT)
47-
__get_cpuid_count(7,0,&exx[0],&exx[1],&exx[2],&exx[3]);
48-
#elif defined(HAVE__CPUIDEX)
49-
__cpuidex(exx,7,0);
50-
#else
51-
#error cpuid instruction not available
52-
#endif
53-
if ((exx[2]& (1 <<14))==0)/* avx512-vpopcntdq */
54-
return false;
55-
56-
/* Does CPUID say there's support for AVX-512 byte and word instructions? */
57-
memset(exx,0,sizeof(exx));
58-
#if defined(HAVE__GET_CPUID_COUNT)
59-
__get_cpuid_count(7,0,&exx[0],&exx[1],&exx[2],&exx[3]);
60-
#elif defined(HAVE__CPUIDEX)
61-
__cpuidex(exx,7,0);
62-
#else
63-
#error cpuid instruction not available
64-
#endif
65-
if ((exx[1]& (1 <<30))==0)/* avx512-bw */
66-
return false;
67-
68-
/* Does CPUID say there's support for XSAVE instructions? */
69-
memset(exx,0,sizeof(exx));
7044
#if defined(HAVE__GET_CPUID)
7145
__get_cpuid(1,&exx[0],&exx[1],&exx[2],&exx[3]);
7246
#elif defined(HAVE__CPUID)
7347
__cpuid(exx,1);
7448
#else
7549
#error cpuid instruction not available
7650
#endif
77-
if ((exx[2]& (1 <<26))==0)/*xsave */
78-
return false;
51+
return (exx[2]& (1 <<27))!=0;/*osxsave */
52+
}
7953

80-
/* Does XGETBV say the ZMM registers are enabled? */
54+
/*
55+
* Does XGETBV say the ZMM registers are enabled?
56+
*
57+
* NB: Caller is responsible for verifying that xsave_available() returns true
58+
* before calling this.
59+
*/
60+
staticinlinebool
61+
zmm_regs_available(void)
62+
{
8163
#ifdefHAVE_XSAVE_INTRINSICS
82-
return (_xgetbv(0)&0xe0)!=0;
64+
return (_xgetbv(0)&0xe6)==0xe6;
8365
#else
8466
return false;
8567
#endif
8668
}
8769

70+
/*
71+
* Does CPUID say there's support for AVX-512 popcount and byte-and-word
72+
* instructions?
73+
*/
74+
staticinlinebool
75+
avx512_popcnt_available(void)
76+
{
77+
unsignedintexx[4]= {0,0,0,0};
78+
79+
#if defined(HAVE__GET_CPUID_COUNT)
80+
__get_cpuid_count(7,0,&exx[0],&exx[1],&exx[2],&exx[3]);
81+
#elif defined(HAVE__CPUIDEX)
82+
__cpuidex(exx,7,0);
83+
#else
84+
#error cpuid instruction not available
85+
#endif
86+
return (exx[2]& (1 <<14))!=0&&/* avx512-vpopcntdq */
87+
(exx[1]& (1 <<30))!=0;/* avx512-bw */
88+
}
89+
90+
/*
91+
* Returns true if the CPU supports the instructions required for the AVX-512
92+
* pg_popcount() implementation.
93+
*/
94+
bool
95+
pg_popcount_avx512_available(void)
96+
{
97+
returnxsave_available()&&
98+
zmm_regs_available()&&
99+
avx512_popcnt_available();
100+
}
101+
88102
#endif/* TRY_POPCNT_FAST */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp