Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc33fb91

Browse files
committed
make the interface friendlier for future adjustments
1 parent5006686 commitc33fb91

File tree

2 files changed

+115
-61
lines changed

2 files changed

+115
-61
lines changed

‎Include/internal/pycore_cpuinfo.h

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,25 @@ extern "C" {
99
# error "this header requires Py_BUILD_CORE define"
1010
#endif
1111

12-
#include<stdbool.h>
13-
1412
typedefstruct {
15-
boolsse,sse2,sse3,sse41,sse42,avx,avx2,avx512vbmi;
16-
booldone;
17-
}_py_cpu_simd_flags;
13+
/* Streaming SIMD Extensions */
14+
uint8_tsse:1;
15+
uint8_tsse2:1;
16+
uint8_tsse3:1;
17+
uint8_tsse41:1;// SSE4.1
18+
uint8_tsse42:1;// SSE4.2
19+
20+
/* Advanced Vector Extensions */
21+
uint8_tavx:1;
22+
uint8_tavx2:1;
23+
uint8_tavx512vbmi:1;// AVX-512 Vector Byte Manipulation Instructions
24+
25+
uint8_tdone;// indicate whether the structure was filled or not
26+
}py_cpu_simd_flags;
1827

28+
/* Detect the available SIMD features on this machine. */
1929
externvoid
20-
_Py_detect_cpu_simd_features(_py_cpu_simd_flags*flags);
30+
_Py_detect_cpu_simd_features(py_cpu_simd_flags*flags);
2131

2232
#ifdef__cplusplus
2333
}

‎Python/cpuinfo.c

Lines changed: 99 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
11
/*
2-
*Naive CPU SIMD features detection.
2+
*Python CPU SIMD features detection.
33
*
4-
* SeeModules/black2module.c.
4+
* Seehttps://en.wikipedia.org/wiki/CPUID for details.
55
*/
66

77
#include"Python.h"
88
#include"pycore_cpuinfo.h"
99

10-
#include<stdbool.h>
10+
#defineCPUID_REG(ARG) ARG
1111

12+
/*
13+
* For simplicity, we only enable SIMD instructions for Intel CPUs,
14+
* even though we could support ARM NEON and POWER.
15+
*/
1216
#if defined(__x86_64__)&& defined(__GNUC__)
1317
# include<cpuid.h>
1418
#elif defined(_M_X64)
1519
# include<intrin.h>
20+
#else
21+
# undef CPUID_REG
22+
# defineCPUID_REG(ARG) Py_UNUSED(ARG)
1623
#endif
1724

1825
// AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64).
@@ -24,6 +31,15 @@
2431
# undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
2532
#endif
2633

34+
/*
35+
* The macros below describe masks to apply on CPUID output registers.
36+
*
37+
* Each macro is of the form [REGISTER][PAGE]_[FEATURE] where
38+
*
39+
* - REGISTER is either EBX, ECX or EDX,
40+
* - PAGE is either 1 or 7 depending, and
41+
* - FEATURE is an SIMD instruction set.
42+
*/
2743
#defineEDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25
2844
#defineEDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26
2945
#defineECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0
@@ -33,78 +49,106 @@
3349
#defineEBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5
3450
#defineECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1
3551

36-
void
37-
_Py_detect_cpu_simd_features(_py_cpu_simd_flags*flags)
38-
{
39-
if (flags->done) {
40-
return;
41-
}
52+
#defineCHECK_CPUID_REGISTER(REGISTER,MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1
4253

43-
inteax1=0,ebx1=0,ecx1=0,edx1=0;
44-
inteax7=0,ebx7=0,ecx7=0,edx7=0;
54+
/*
55+
* Indicate whether the CPUID input EAX=1 may be needed to
56+
* detect SIMD basic features (e.g., SSE).
57+
*/
58+
#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \
59+
|| defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \
60+
|| defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \
61+
|| defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \
62+
|| defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \
63+
|| defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS)
64+
# defineMAY_DETECT_CPUID_SIMD_FEATURES
65+
#endif
66+
67+
/*
68+
* Indicate whether the CPUID input EAX=7 may be needed to
69+
* detect SIMD extended features (e.g., AVX2 or AVX-512).
70+
*/
71+
#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \
72+
|| defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS)
73+
# defineMAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES
74+
#endif
75+
76+
staticinlinevoid
77+
get_cpuid_info(int32_tlevel/* input eax */,
78+
int32_tcount/* input ecx */,
79+
int32_t*CPUID_REG(eax),
80+
int32_t*CPUID_REG(ebx),
81+
int32_t*CPUID_REG(ecx),
82+
int32_t*CPUID_REG(edx))
83+
{
4584
#if defined(__x86_64__)&& defined(__GNUC__)
46-
__cpuid_count(1,0,eax1,ebx1,ecx1,edx1);
47-
__cpuid_count(7,0,eax7,ebx7,ecx7,edx7);
85+
__cpuid_count(level,count,*eax,*ebx,*ecx,*edx);
4886
#elif defined(_M_X64)
49-
intinfo1[4]= {0};
50-
__cpuidex(info1,1,0);
51-
eax1=info1[0];
52-
ebx1=info1[1];
53-
ecx1=info1[2];
54-
edx1=info1[3];
55-
56-
intinfo7[4]= {0};
57-
__cpuidex(info7,7,0);
58-
eax7=info7[0];
59-
ebx7=info7[1];
60-
ecx7=info7[2];
61-
edx7=info7[3];
62-
#else
63-
// use (void) expressions to avoid warnings
64-
(void)eax1; (void)ebx1; (void)ecx1; (void)edx1;
65-
(void)eax7; (void)ebx7; (void)ecx7; (void)edx7;
87+
int32_tinfo[4]= {0};
88+
__cpuidex(info,page,count);
89+
*eax=info[0];
90+
*ebx=info[1];
91+
*ecx=info[2];
92+
*edx=info[3];
6693
#endif
94+
}
6795

96+
/* Processor Info and Feature Bits (EAX=1, ECX=0). */
97+
staticinlinevoid
98+
detect_cpu_simd_features(py_cpu_simd_flags*flags)
99+
{
100+
int32_teax=0,ebx=0,ecx=0,edx=0;
101+
get_cpuid_info(1,0,&eax,&ebx,&ecx,&edx);
68102
#ifdefCAN_COMPILE_SIMD_SSE_INSTRUCTIONS
69-
flags->sse= (edx1&EDX1_SSE)!=0;
70-
#else
71-
flags->sse= false;
103+
flags->sse=CHECK_CPUID_REGISTER(edx,EDX1_SSE);
72104
#endif
73105
#ifdefCAN_COMPILE_SIMD_SSE2_INSTRUCTIONS
74-
flags->sse2= (edx1&EDX1_SSE2)!=0;
75-
#else
76-
flags->sse2= false;
106+
flags->sse2=CHECK_CPUID_REGISTER(edx,EDX1_SSE2);
77107
#endif
78108
#ifdefCAN_COMPILE_SIMD_SSE3_INSTRUCTIONS
79-
flags->sse3= (ecx1&ECX1_SSE3)!=0;
80-
#else
109+
flags->sse3=CHECK_CPUID_REGISTER(ecx,ECX1_SSE3);
81110
#endif
82-
flags->sse3= false;
83111
#ifdefCAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS
84-
flags->sse41= (ecx1&ECX1_SSE4_1)!=0;
85-
#else
86-
flags->sse41= false;
112+
flags->sse41=CHECK_CPUID_REGISTER(ecx,ECX1_SSE4_1);
87113
#endif
88114
#ifdefCAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS
89-
flags->sse42= (ecx1&ECX1_SSE4_2)!=0;
90-
#else
91-
flags->sse42= false;
115+
flags->sse42=CHECK_CPUID_REGISTER(ecx,ECX1_SSE4_2);
92116
#endif
93117
#ifdefCAN_COMPILE_SIMD_AVX_INSTRUCTIONS
94-
flags->avx= (ecx1&ECX1_AVX)!=0;
95-
#else
96-
flags->avx= false;
118+
flags->avx=CHECK_CPUID_REGISTER(ecx,ECX1_AVX);
97119
#endif
120+
}
121+
122+
/* Extended feature bits (EAX=7, ECX=0). */
123+
staticinlinevoid
124+
detect_cpu_simd_extended_features(py_cpu_simd_flags*flags)
125+
{
126+
int32_teax=0,ebx=0,ecx=0,edx=0;
127+
get_cpuid_info(7,0,&eax,&ebx,&ecx,&edx);
98128
#ifdefCAN_COMPILE_SIMD_AVX2_INSTRUCTIONS
99-
flags->avx2= (ebx7&EBX7_AVX2)!=0;
100-
#else
101-
flags->avx2= false;
129+
flags->avx2=CHECK_CPUID_REGISTER(ebx,EBX7_AVX2);
102130
#endif
103131
#ifdefCAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
104-
flags->avx512vbmi= (ecx7&ECX7_AVX512_VBMI)!=0;
105-
#else
106-
flags->avx512vbmi= false;
132+
flags->avx512vbmi=CHECK_CPUID_REGISTER(ecx,ECX7_AVX512_VBMI);
107133
#endif
134+
}
108135

109-
flags->done= true;
136+
void
137+
_Py_detect_cpu_simd_features(py_cpu_simd_flags*flags)
138+
{
139+
if (flags->done) {
140+
return;
141+
}
142+
#ifdefMAY_DETECT_CPUID_SIMD_FEATURES
143+
detect_cpu_simd_features(flags);
144+
#else
145+
flags->sse=flags->sse2=flags->sse3=flags->sse41=flags->sse42=0;
146+
flags->avx=0;
147+
#endif
148+
#ifdefMAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES
149+
detect_cpu_simd_extended_features(flags);
150+
#else
151+
flags->avx2=flags->avx512vbmi=0;
152+
#endif
153+
flags->done=1;
110154
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp