Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf1877d1

Browse files
committed
Convert unary_fp_le to highway.
1 parentab01d8f commitf1877d1

File tree

6 files changed

+507
-557
lines changed

6 files changed

+507
-557
lines changed

‎numpy/_core/meson.build

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1021,12 +1021,13 @@ foreach gen_mtargets : [
10211021
],
10221022
[
10231023
'loops_unary_fp_le.dispatch.h',
1024-
src_file.process('src/umath/loops_unary_fp_le.dispatch.c.src'),
1024+
'src/umath/loops_unary_fp_le.dispatch.cpp',
10251025
[
10261026
SSE41, SSE2,
10271027
VSX2,
10281028
ASIMD, NEON,
10291029
LSX,
1030+
RVV,
10301031
]
10311032
],
10321033
[

‎numpy/_core/src/common/simd/simd.hpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#ifndef NUMPY__CORE_SRC_COMMON_SIMD_SIMD_HPP_
2+
#defineNUMPY__CORE_SRC_COMMON_SIMD_SIMD_HPP_
3+
4+
/**
5+
* This header provides a thin wrapper over Google's Highway SIMD library.
6+
*
7+
* The wrapper aims to simplify the SIMD interface of Google's Highway by
8+
* get ride of its class tags and use lane types directly which can be deduced
9+
* from the args in most cases.
10+
*/
11+
/**
12+
* Since `NPY_SIMD` is only limited to NumPy C universal intrinsics,
13+
* `NPY_SIMDX` is defined to indicate the SIMD availability for Google's Highway
14+
* C++ code.
15+
*
16+
* Highway SIMD is only available when optimization is enabled.
17+
* When NPY_DISABLE_OPTIMIZATION is defined, SIMD operations are disabled
18+
* and the code falls back to scalar implementations.
19+
*/
20+
#ifndef NPY_DISABLE_OPTIMIZATION
21+
#include<hwy/highway.h>
22+
23+
/**
24+
* We avoid using Highway scalar operations for the following reasons:
25+
* 1. We already provide kernels for scalar operations, so falling back to
26+
* the NumPy implementation is more appropriate. Compilers can often
27+
* optimize these better since they rely on standard libraries.
28+
* 2. Not all Highway intrinsics are fully supported in scalar mode.
29+
*
30+
* Therefore, we only enable SIMD when the Highway target is not scalar.
31+
*/
32+
#defineNPY_SIMDX (HWY_TARGET != HWY_SCALAR)
33+
34+
// Indicates if the SIMD operations are available for float16.
35+
#defineNPY_SIMDX_F16 (NPY_SIMDX && HWY_HAVE_FLOAT16)
36+
// Note: Highway requires SIMD extentions with native float32 support, so we don't need
37+
// to check for it.
38+
39+
// Indicates if the SIMD operations are available for float64.
40+
#defineNPY_SIMDX_F64 (NPY_SIMDX && HWY_HAVE_FLOAT64)
41+
42+
// Indicates if the SIMD floating operations are natively supports fma.
43+
#defineNPY_SIMDX_FMA (NPY_SIMDX && HWY_NATIVE_FMA)
44+
45+
#else
46+
#defineNPY_SIMDX0
47+
#defineNPY_SIMDX_F160
48+
#defineNPY_SIMDX_F640
49+
#defineNPY_SIMDX_FMA0
50+
#endif
51+
52+
namespacenp {
53+
54+
/// Represents the max SIMD width supported by the platform.
55+
namespacesimd {
56+
#if NPY_SIMDX
57+
/// The highway namespace alias.
58+
/// We can not import all the symbols from the HWY_NAMESPACE because it will
59+
/// conflict with the existing symbols in the numpy namespace.
60+
namespacehn= hwy::HWY_NAMESPACE;
61+
// internaly used by the template header
62+
template<typename TLane>
63+
using _Tag = hn::ScalableTag<TLane>;
64+
#endif
65+
#include"simd.inc.hpp"
66+
}// namespace simd
67+
68+
/// Represents the 128-bit SIMD width.
69+
namespacesimd128 {
70+
#if NPY_SIMDX
71+
namespacehn= hwy::HWY_NAMESPACE;
72+
template<typename TLane>
73+
using _Tag = hn::Full128<TLane>;
74+
#endif
75+
#include"simd.inc.hpp"
76+
}// namespace simd128
77+
78+
}// namespace np
79+
80+
#endif// NUMPY__CORE_SRC_COMMON_SIMD_SIMD_HPP_
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#ifndef NPY_SIMDX
2+
#error "This is not a standalone header. Include simd.hpp instead."
3+
#defineNPY_SIMDX1// Prevent editors from graying out the happy branch
4+
#endif
5+
6+
// Using anonymous namespace instead of inline to ensure each translation unit
7+
// gets its own copy of constants based on local compilation flags
8+
namespace {
9+
10+
// NOTE: This file is included by simd.hpp multiple times with different namespaces
11+
// so avoid including any headers here
12+
13+
/**
14+
* Determines whether the specified lane type is supported by the SIMD extension.
15+
* Always defined as false when SIMD is not enabled, so it can be used in SFINAE.
16+
*
17+
* @tparam TLane The lane type to check for support.
18+
*/
19+
template<typename TLane>
20+
constexprboolkSupportLane = NPY_SIMDX !=0;
21+
22+
#if NPY_SIMDX
23+
// Define lane type support based on Highway capabilities
24+
template<>
25+
constexprboolkSupportLane<hwy::float16_t> = HWY_HAVE_FLOAT16 !=0;
26+
template<>
27+
constexprboolkSupportLane<double> = HWY_HAVE_FLOAT64 !=0;
28+
template<>
29+
constexprboolkSupportLane<longdouble> =
30+
HWY_HAVE_FLOAT64 !=0 &&sizeof(longdouble) ==sizeof(double);
31+
32+
/// Maximum number of lanes supported by the SIMD extension for the specified lane type.
33+
template<typename TLane>
34+
constexprsize_tkMaxLanes = HWY_MAX_LANES_D(_Tag<TLane>);
35+
36+
/// Represents an N-lane vector based on the specified lane type.
37+
/// @tparam TLane The scalar type for each vector lane
38+
template<typename TLane>
39+
using Vec = hn::Vec<_Tag<TLane>>;
40+
41+
/// Represents a mask vector with boolean values or as a bitmask.
42+
/// @tparam TLane The scalar type the mask corresponds to
43+
template<typename TLane>
44+
using Mask = hn::Mask<_Tag<TLane>>;
45+
46+
/// Unaligned load of a vector from memory.
47+
template<typename TLane>
48+
HWY_API Vec<TLane>
49+
LoadU(const TLane *ptr)
50+
{
51+
returnhn::LoadU(_Tag<TLane>(), ptr);
52+
}
53+
54+
/// Unaligned store of a vector to memory.
55+
template<typename TLane>
56+
HWY_APIvoid
57+
StoreU(const Vec<TLane> &a, TLane *ptr)
58+
{
59+
hn::StoreU(a, _Tag<TLane>(), ptr);
60+
}
61+
62+
/// Returns the number of vector lanes based on the lane type.
63+
template<typename TLane>
64+
HWY_API HWY_LANES_CONSTEXPRsize_t
65+
Lanes(TLane tag =0)
66+
{
67+
returnhn::Lanes(_Tag<TLane>());
68+
}
69+
70+
/// Returns an uninitialized N-lane vector.
71+
template<typename TLane>
72+
HWY_API Vec<TLane>
73+
Undefined(TLane tag =0)
74+
{
75+
returnhn::Undefined(_Tag<TLane>());
76+
}
77+
78+
/// Returns N-lane vector with all lanes equal to zero.
79+
template<typename TLane>
80+
HWY_API Vec<TLane>
81+
Zero(TLane tag =0)
82+
{
83+
returnhn::Zero(_Tag<TLane>());
84+
}
85+
86+
/// Returns N-lane vector with all lanes equal to the given value of type `TLane`.
87+
template<typename TLane>
88+
HWY_API Vec<TLane>
89+
Set(TLane val)
90+
{
91+
returnhn::Set(_Tag<TLane>(), val);
92+
}
93+
94+
/// Converts a mask to a vector based on the specified lane type.
95+
template<typename TLane,typename TMask>
96+
HWY_API Vec<TLane>
97+
VecFromMask(const TMask &m)
98+
{
99+
returnhn::VecFromMask(_Tag<TLane>(), m);
100+
}
101+
102+
/// Convert (Reinterpret) an N-lane vector to a different type without modifying the
103+
/// underlying data.
104+
template<typename TLaneTo,typename TVec>
105+
HWY_API Vec<TLaneTo>
106+
BitCast(const TVec &v)
107+
{
108+
returnhn::BitCast(_Tag<TLaneTo>(), v);
109+
}
110+
111+
// Import common Highway intrinsics
112+
using hn::Abs;
113+
using hn::Add;
114+
using hn::And;
115+
using hn::AndNot;
116+
using hn::Div;
117+
using hn::Eq;
118+
using hn::Ge;
119+
using hn::Gt;
120+
using hn::Le;
121+
using hn::Lt;
122+
using hn::Max;
123+
using hn::Min;
124+
using hn::Mul;
125+
using hn::Or;
126+
using hn::Sqrt;
127+
using hn::Sub;
128+
using hn::Xor;
129+
130+
#endif// NPY_SIMDX
131+
132+
}// namespace anonymous

‎numpy/_core/src/highway

Submodulehighway updated113 files

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp