Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9f225e9

Browse files
committed
Introduce helper SIMD functions for small byte arrays
vector8_min - helper for emulating ">=" semanticsvector8_highbit_mask - used to turn the result of a vectorcomparison into a bitmaskMasahiko SawadaReviewed by Nathan Bossart, with additional adjustments by meDiscussion:https://postgr.es/m/CAFBsxsHbBm_M22gLBO%2BAZT4mfMq3L_oX3wdKZxjeNnT7fHsYMQ%40mail.gmail.com
1 parent60c0782 commit9f225e9

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

‎src/include/port/simd.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ static inline bool vector8_has_le(const Vector8 v, const uint8 c);
7979
staticinlineboolvector8_is_highbit_set(constVector8v);
8080
#ifndefUSE_NO_SIMD
8181
staticinlineboolvector32_is_highbit_set(constVector32v);
82+
staticinlineuint32vector8_highbit_mask(constVector8v);
8283
#endif
8384

8485
/* arithmetic operations */
@@ -96,6 +97,7 @@ static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);
9697
*/
9798
#ifndefUSE_NO_SIMD
9899
staticinlineVector8vector8_eq(constVector8v1,constVector8v2);
100+
staticinlineVector8vector8_min(constVector8v1,constVector8v2);
99101
staticinlineVector32vector32_eq(constVector32v1,constVector32v2);
100102
#endif
101103

@@ -299,6 +301,36 @@ vector32_is_highbit_set(const Vector32 v)
299301
}
300302
#endif/* ! USE_NO_SIMD */
301303

304+
/*
305+
* Return a bitmask formed from the high-bit of each element.
306+
*/
307+
#ifndefUSE_NO_SIMD
308+
staticinlineuint32
309+
vector8_highbit_mask(constVector8v)
310+
{
311+
#ifdefUSE_SSE2
312+
return (uint32)_mm_movemask_epi8(v);
313+
#elif defined(USE_NEON)
314+
/*
315+
* Note: It would be faster to use vget_lane_u64 and vshrn_n_u16, but that
316+
* returns a uint64, making it inconvenient to combine mask values from
317+
* multiple vectors.
318+
*/
319+
staticconstuint8mask[16]= {
320+
1 <<0,1 <<1,1 <<2,1 <<3,
321+
1 <<4,1 <<5,1 <<6,1 <<7,
322+
1 <<0,1 <<1,1 <<2,1 <<3,
323+
1 <<4,1 <<5,1 <<6,1 <<7,
324+
};
325+
326+
uint8x16_tmasked=vandq_u8(vld1q_u8(mask), (uint8x16_t)vshrq_n_s8(v,7));
327+
uint8x16_tmaskedhi=vextq_u8(masked,masked,8);
328+
329+
return (uint32)vaddvq_u16((uint16x8_t)vzip1q_u8(masked,maskedhi));
330+
#endif
331+
}
332+
#endif/* ! USE_NO_SIMD */
333+
302334
/*
303335
* Return the bitwise OR of the inputs
304336
*/
@@ -372,4 +404,19 @@ vector32_eq(const Vector32 v1, const Vector32 v2)
372404
}
373405
#endif/* ! USE_NO_SIMD */
374406

407+
/*
408+
* Given two vectors, return a vector with the minimum element of each.
409+
*/
410+
#ifndefUSE_NO_SIMD
411+
staticinlineVector8
412+
vector8_min(constVector8v1,constVector8v2)
413+
{
414+
#ifdefUSE_SSE2
415+
return_mm_min_epu8(v1,v2);
416+
#elif defined(USE_NEON)
417+
returnvminq_u8(v1,v2);
418+
#endif
419+
}
420+
#endif/* ! USE_NO_SIMD */
421+
375422
#endif/* SIMD_H */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp