NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit9f225e9

committed

Introduce helper SIMD functions for small byte arrays

vector8_min - helper for emulating ">=" semanticsvector8_highbit_mask - used to turn the result of a vectorcomparison into a bitmaskMasahiko SawadaReviewed by Nathan Bossart, with additional adjustments by meDiscussion:https://postgr.es/m/CAFBsxsHbBm_M22gLBO%2BAZT4mfMq3L_oX3wdKZxjeNnT7fHsYMQ%40mail.gmail.com

1 parent60c0782 commit9f225e9Copy full SHA for 9f225e9

File tree

1 file changed

+47

-0

lines changed

src/include/port
- simd.h

1 file changed

+47

-0

lines changed

`‎src/include/port/simd.h`

Lines changed: 47 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -79,6 +79,7 @@ static inline bool vector8_has_le(const Vector8 v, const uint8 c);`
`79`	`79`	`staticinlineboolvector8_is_highbit_set(constVector8v);`
`80`	`80`	`#ifndefUSE_NO_SIMD`
`81`	`81`	`staticinlineboolvector32_is_highbit_set(constVector32v);`
	`82`	`+staticinlineuint32vector8_highbit_mask(constVector8v);`
`82`	`83`	`#endif`
`83`	`84`
`84`	`85`	`/* arithmetic operations */`
`@@ -96,6 +97,7 @@ static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);`
`96`	`97`	`*/`
`97`	`98`	`#ifndefUSE_NO_SIMD`
`98`	`99`	`staticinlineVector8vector8_eq(constVector8v1,constVector8v2);`
	`100`	`+staticinlineVector8vector8_min(constVector8v1,constVector8v2);`
`99`	`101`	`staticinlineVector32vector32_eq(constVector32v1,constVector32v2);`
`100`	`102`	`#endif`
`101`	`103`
`@@ -299,6 +301,36 @@ vector32_is_highbit_set(const Vector32 v)`
`299`	`301`	`}`
`300`	`302`	`#endif/* ! USE_NO_SIMD */`
`301`	`303`
	`304`	`+/*`
	`305`	`+ * Return a bitmask formed from the high-bit of each element.`
	`306`	`+ */`
	`307`	`+#ifndefUSE_NO_SIMD`
	`308`	`+staticinlineuint32`
	`309`	`+vector8_highbit_mask(constVector8v)`
	`310`	`+{`
	`311`	`+#ifdefUSE_SSE2`
	`312`	`+return (uint32)_mm_movemask_epi8(v);`
	`313`	`+#elif defined(USE_NEON)`
	`314`	`+/*`
	`315`	`+ * Note: It would be faster to use vget_lane_u64 and vshrn_n_u16, but that`
	`316`	`+ * returns a uint64, making it inconvenient to combine mask values from`
	`317`	`+ * multiple vectors.`
	`318`	`+ */`
	`319`	`+staticconstuint8mask[16]= {`
	`320`	`+1 <<0,1 <<1,1 <<2,1 <<3,`
	`321`	`+1 <<4,1 <<5,1 <<6,1 <<7,`
	`322`	`+1 <<0,1 <<1,1 <<2,1 <<3,`
	`323`	`+1 <<4,1 <<5,1 <<6,1 <<7,`
	`324`	`+};`
	`325`	`+`
	`326`	`+uint8x16_tmasked=vandq_u8(vld1q_u8(mask), (uint8x16_t)vshrq_n_s8(v,7));`
	`327`	`+uint8x16_tmaskedhi=vextq_u8(masked,masked,8);`
	`328`	`+`
	`329`	`+return (uint32)vaddvq_u16((uint16x8_t)vzip1q_u8(masked,maskedhi));`
	`330`	`+#endif`
	`331`	`+}`
	`332`	`+#endif/* ! USE_NO_SIMD */`
	`333`	`+`
`302`	`334`	`/*`
`303`	`335`	`* Return the bitwise OR of the inputs`
`304`	`336`	`*/`
`@@ -372,4 +404,19 @@ vector32_eq(const Vector32 v1, const Vector32 v2)`
`372`	`404`	`}`
`373`	`405`	`#endif/* ! USE_NO_SIMD */`
`374`	`406`
	`407`	`+/*`
	`408`	`+ * Given two vectors, return a vector with the minimum element of each.`
	`409`	`+ */`
	`410`	`+#ifndefUSE_NO_SIMD`
	`411`	`+staticinlineVector8`
	`412`	`+vector8_min(constVector8v1,constVector8v2)`
	`413`	`+{`
	`414`	`+#ifdefUSE_SSE2`
	`415`	`+return_mm_min_epu8(v1,v2);`
	`416`	`+#elif defined(USE_NEON)`
	`417`	`+returnvminq_u8(v1,v2);`
	`418`	`+#endif`
	`419`	`+}`
	`420`	`+#endif/* ! USE_NO_SIMD */`
	`421`	`+`
`375`	`422`	`#endif/* SIMD_H */`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit9f225e9

File tree

1 file changed

1 file changed

`‎src/include/port/simd.h`

0 commit comments