|
1 | 1 | /*
|
2 |
| - *Naive CPU SIMD features detection. |
| 2 | + *Python CPU SIMD features detection. |
3 | 3 | *
|
4 |
| - * SeeModules/black2module.c. |
| 4 | + * Seehttps://en.wikipedia.org/wiki/CPUID for details. |
5 | 5 | */
|
6 | 6 |
|
7 | 7 | #include"Python.h"
|
8 | 8 | #include"pycore_cpuinfo.h"
|
9 | 9 |
|
10 |
| -#include<stdbool.h> |
| 10 | +#defineCPUID_REG(ARG) ARG |
11 | 11 |
|
| 12 | +/* |
| 13 | + * For simplicity, we only enable SIMD instructions for Intel CPUs, |
| 14 | + * even though we could support ARM NEON and POWER. |
| 15 | + */ |
12 | 16 | #if defined(__x86_64__)&& defined(__GNUC__)
|
13 | 17 | # include<cpuid.h>
|
14 | 18 | #elif defined(_M_X64)
|
15 | 19 | # include<intrin.h>
|
| 20 | +#else |
| 21 | +# undef CPUID_REG |
| 22 | +# defineCPUID_REG(ARG) Py_UNUSED(ARG) |
16 | 23 | #endif
|
17 | 24 |
|
18 | 25 | // AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64).
|
|
24 | 31 | # undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
|
25 | 32 | #endif
|
26 | 33 |
|
| 34 | +/* |
| 35 | + * The macros below describe masks to apply on CPUID output registers. |
| 36 | + * |
| 37 | + * Each macro is of the form [REGISTER][PAGE]_[FEATURE] where |
| 38 | + * |
| 39 | + * - REGISTER is either EBX, ECX or EDX, |
| 40 | + * - PAGE is either 1 or 7 depending, and |
| 41 | + * - FEATURE is an SIMD instruction set. |
| 42 | + */ |
27 | 43 | #defineEDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25
|
28 | 44 | #defineEDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26
|
29 | 45 | #defineECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0
|
|
33 | 49 | #defineEBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5
|
34 | 50 | #defineECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1
|
35 | 51 |
|
36 |
| -void |
37 |
| -_Py_detect_cpu_simd_features(_py_cpu_simd_flags*flags) |
38 |
| -{ |
39 |
| -if (flags->done) { |
40 |
| -return; |
41 |
| - } |
| 52 | +#defineCHECK_CPUID_REGISTER(REGISTER,MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 |
42 | 53 |
|
43 |
| -inteax1=0,ebx1=0,ecx1=0,edx1=0; |
44 |
| -inteax7=0,ebx7=0,ecx7=0,edx7=0; |
| 54 | +/* |
| 55 | + * Indicate whether the CPUID input EAX=1 may be needed to |
| 56 | + * detect SIMD basic features (e.g., SSE). |
| 57 | + */ |
| 58 | +#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ |
| 59 | +|| defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ |
| 60 | +|| defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ |
| 61 | +|| defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ |
| 62 | +|| defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ |
| 63 | +|| defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) |
| 64 | +# defineMAY_DETECT_CPUID_SIMD_FEATURES |
| 65 | +#endif |
| 66 | + |
| 67 | +/* |
| 68 | + * Indicate whether the CPUID input EAX=7 may be needed to |
| 69 | + * detect SIMD extended features (e.g., AVX2 or AVX-512). |
| 70 | + */ |
| 71 | +#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ |
| 72 | +|| defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) |
| 73 | +# defineMAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES |
| 74 | +#endif |
| 75 | + |
| 76 | +staticinlinevoid |
| 77 | +get_cpuid_info(int32_tlevel/* input eax */, |
| 78 | +int32_tcount/* input ecx */, |
| 79 | +int32_t*CPUID_REG(eax), |
| 80 | +int32_t*CPUID_REG(ebx), |
| 81 | +int32_t*CPUID_REG(ecx), |
| 82 | +int32_t*CPUID_REG(edx)) |
| 83 | +{ |
45 | 84 | #if defined(__x86_64__)&& defined(__GNUC__)
|
46 |
| -__cpuid_count(1,0,eax1,ebx1,ecx1,edx1); |
47 |
| -__cpuid_count(7,0,eax7,ebx7,ecx7,edx7); |
| 85 | +__cpuid_count(level,count,*eax,*ebx,*ecx,*edx); |
48 | 86 | #elif defined(_M_X64)
|
49 |
| -intinfo1[4]= {0}; |
50 |
| -__cpuidex(info1,1,0); |
51 |
| -eax1=info1[0]; |
52 |
| -ebx1=info1[1]; |
53 |
| -ecx1=info1[2]; |
54 |
| -edx1=info1[3]; |
55 |
| - |
56 |
| -intinfo7[4]= {0}; |
57 |
| -__cpuidex(info7,7,0); |
58 |
| -eax7=info7[0]; |
59 |
| -ebx7=info7[1]; |
60 |
| -ecx7=info7[2]; |
61 |
| -edx7=info7[3]; |
62 |
| -#else |
63 |
| -// use (void) expressions to avoid warnings |
64 |
| - (void)eax1; (void)ebx1; (void)ecx1; (void)edx1; |
65 |
| - (void)eax7; (void)ebx7; (void)ecx7; (void)edx7; |
| 87 | +int32_tinfo[4]= {0}; |
| 88 | +__cpuidex(info,page,count); |
| 89 | +*eax=info[0]; |
| 90 | +*ebx=info[1]; |
| 91 | +*ecx=info[2]; |
| 92 | +*edx=info[3]; |
66 | 93 | #endif
|
| 94 | +} |
67 | 95 |
|
| 96 | +/* Processor Info and Feature Bits (EAX=1, ECX=0). */ |
| 97 | +staticinlinevoid |
| 98 | +detect_cpu_simd_features(py_cpu_simd_flags*flags) |
| 99 | +{ |
| 100 | +int32_teax=0,ebx=0,ecx=0,edx=0; |
| 101 | +get_cpuid_info(1,0,&eax,&ebx,&ecx,&edx); |
68 | 102 | #ifdefCAN_COMPILE_SIMD_SSE_INSTRUCTIONS
|
69 |
| -flags->sse= (edx1&EDX1_SSE)!=0; |
70 |
| -#else |
71 |
| -flags->sse= false; |
| 103 | +flags->sse=CHECK_CPUID_REGISTER(edx,EDX1_SSE); |
72 | 104 | #endif
|
73 | 105 | #ifdefCAN_COMPILE_SIMD_SSE2_INSTRUCTIONS
|
74 |
| -flags->sse2= (edx1&EDX1_SSE2)!=0; |
75 |
| -#else |
76 |
| -flags->sse2= false; |
| 106 | +flags->sse2=CHECK_CPUID_REGISTER(edx,EDX1_SSE2); |
77 | 107 | #endif
|
78 | 108 | #ifdefCAN_COMPILE_SIMD_SSE3_INSTRUCTIONS
|
79 |
| -flags->sse3= (ecx1&ECX1_SSE3)!=0; |
80 |
| -#else |
| 109 | +flags->sse3=CHECK_CPUID_REGISTER(ecx,ECX1_SSE3); |
81 | 110 | #endif
|
82 |
| -flags->sse3= false; |
83 | 111 | #ifdefCAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS
|
84 |
| -flags->sse41= (ecx1&ECX1_SSE4_1)!=0; |
85 |
| -#else |
86 |
| -flags->sse41= false; |
| 112 | +flags->sse41=CHECK_CPUID_REGISTER(ecx,ECX1_SSE4_1); |
87 | 113 | #endif
|
88 | 114 | #ifdefCAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS
|
89 |
| -flags->sse42= (ecx1&ECX1_SSE4_2)!=0; |
90 |
| -#else |
91 |
| -flags->sse42= false; |
| 115 | +flags->sse42=CHECK_CPUID_REGISTER(ecx,ECX1_SSE4_2); |
92 | 116 | #endif
|
93 | 117 | #ifdefCAN_COMPILE_SIMD_AVX_INSTRUCTIONS
|
94 |
| -flags->avx= (ecx1&ECX1_AVX)!=0; |
95 |
| -#else |
96 |
| -flags->avx= false; |
| 118 | +flags->avx=CHECK_CPUID_REGISTER(ecx,ECX1_AVX); |
97 | 119 | #endif
|
| 120 | +} |
| 121 | + |
| 122 | +/* Extended feature bits (EAX=7, ECX=0). */ |
| 123 | +staticinlinevoid |
| 124 | +detect_cpu_simd_extended_features(py_cpu_simd_flags*flags) |
| 125 | +{ |
| 126 | +int32_teax=0,ebx=0,ecx=0,edx=0; |
| 127 | +get_cpuid_info(7,0,&eax,&ebx,&ecx,&edx); |
98 | 128 | #ifdefCAN_COMPILE_SIMD_AVX2_INSTRUCTIONS
|
99 |
| -flags->avx2= (ebx7&EBX7_AVX2)!=0; |
100 |
| -#else |
101 |
| -flags->avx2= false; |
| 129 | +flags->avx2=CHECK_CPUID_REGISTER(ebx,EBX7_AVX2); |
102 | 130 | #endif
|
103 | 131 | #ifdefCAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
|
104 |
| -flags->avx512vbmi= (ecx7&ECX7_AVX512_VBMI)!=0; |
105 |
| -#else |
106 |
| -flags->avx512vbmi= false; |
| 132 | +flags->avx512vbmi=CHECK_CPUID_REGISTER(ecx,ECX7_AVX512_VBMI); |
107 | 133 | #endif
|
| 134 | +} |
108 | 135 |
|
109 |
| -flags->done= true; |
| 136 | +void |
| 137 | +_Py_detect_cpu_simd_features(py_cpu_simd_flags*flags) |
| 138 | +{ |
| 139 | +if (flags->done) { |
| 140 | +return; |
| 141 | + } |
| 142 | +#ifdefMAY_DETECT_CPUID_SIMD_FEATURES |
| 143 | +detect_cpu_simd_features(flags); |
| 144 | +#else |
| 145 | +flags->sse=flags->sse2=flags->sse3=flags->sse41=flags->sse42=0; |
| 146 | +flags->avx=0; |
| 147 | +#endif |
| 148 | +#ifdefMAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES |
| 149 | +detect_cpu_simd_extended_features(flags); |
| 150 | +#else |
| 151 | +flags->avx2=flags->avx512vbmi=0; |
| 152 | +#endif |
| 153 | +flags->done=1; |
110 | 154 | }
|