Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0f96754

Browse files
committed
use rotl/rotr in 8x Montgomery mul
1 parentdd4320f commit0f96754

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

‎cp-algo/util/simd.hpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ namespace cp_algo {
4141
[[gnu::always_inline]]inline u64x4low32(u64x4 x) {
4242
return x &uint32_t(-1);
4343
}
44+
[[gnu::always_inline]]inlineautorotr(auto x) {
45+
returndecltype(x)(__builtin_shufflevector(u32x8(x),u32x8(x),1,2,3,0,5,6,7,4));
46+
}
47+
[[gnu::always_inline]]inlineautorotl(auto x) {
48+
returndecltype(x)(__builtin_shufflevector(u32x8(x),u32x8(x),3,0,1,2,7,4,5,6));
49+
}
4450

4551
[[gnu::always_inline]]inline u64x4montgomery_reduce(u64x4 x,uint32_t mod,uint32_t imod) {
4652
#ifdef __AVX2__
@@ -50,7 +56,7 @@ namespace cp_algo {
5056
auto x_ninv = x * imod;
5157
x +=low32(x_ninv) * mod;
5258
#endif
53-
returnx >>32;
59+
returnrotr(x);
5460
}
5561

5662
[[gnu::always_inline]]inline u64x4montgomery_mul(u64x4 x, u64x4 y,uint32_t mod,uint32_t imod) {
@@ -60,16 +66,10 @@ namespace cp_algo {
6066
returnmontgomery_reduce(low32(x) *low32(y), mod, imod);
6167
#endif
6268
}
63-
6469
[[gnu::always_inline]]inline u32x8montgomery_mul(u32x8 x, u32x8 y,uint32_t mod,uint32_t imod) {
65-
auto x0246 =u64x4(x);
66-
auto y0246 =u64x4(y);
67-
auto x1357 =u64x4(x) >>32;
68-
auto y1357 =u64x4(y) >>32;
69-
returnu32x8(montgomery_mul(x0246, y0246, mod, imod)) |
70-
u32x8(montgomery_mul(x1357, y1357, mod, imod) <<32);
70+
returnu32x8(montgomery_mul(u64x4(x),u64x4(y), mod, imod)) |
71+
u32x8(rotl(montgomery_mul(u64x4(rotr(x)),u64x4(rotr(y)), mod, imod)));
7172
}
72-
7373
[[gnu::always_inline]]inline dx4rotate_right(dx4 x) {
7474
staticconstexpr u64x4 shuffler = {3,0,1,2};
7575
return__builtin_shuffle(x, shuffler);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp