NotificationsYou must be signed in to change notification settings
Fork5.1k
Star18.7k

Commitf8f4afe

committed

Optimize vector8_has_le() on AArch64.

Presently, the SIMD implementation of this function uses unsignedsaturating subtraction to find bytes less than or equal to thegiven value, which is a workaround for the lack of unsignedcomparison instructions on some architectures. However, Neonoffers vminvq_u8(), which returns the minimum (unsigned) value inthe vector. This commit adds a Neon-specific implementation thatuses vminvq_u8() to optimize vector8_has_le() on AArch64.In passing, adjust the SSE2 implementation to use vector8_min() andvector8_eq() to find values less than or equal to the given value.This was the only use of vector8_ssub(), so it has been removed.Reviewed-by: John Naylor <johncnaylorls@gmail.com>Discussion:https://postgr.es/m/aNHDNDSHleq0ogC_%40nathan

1 parent74b41f5 commitf8f4afeCopy full SHA for f8f4afe

File tree

1 file changed

+10

-27

lines changed

src/include/port
- simd.h

1 file changed

+10

-27

lines changed

`‎src/include/port/simd.h‎`

Lines changed: 10 additions & 27 deletions

Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,6 @@ static inline uint32 vector8_highbit_mask(const Vector8 v);`
`86`	`86`	`staticinlineVector8vector8_or(constVector8v1,constVector8v2);`
`87`	`87`	`#ifndefUSE_NO_SIMD`
`88`	`88`	`staticinlineVector32vector32_or(constVector32v1,constVector32v2);`
`89`		`-staticinlineVector8vector8_ssub(constVector8v1,constVector8v2);`
`90`	`89`	`#endif`
`91`	`90`
`92`	`91`	`/*`
`@@ -213,6 +212,10 @@ static inline bool`
`213`	`212`	`vector8_has_le(constVector8v,constuint8c)`
`214`	`213`	`{`
`215`	`214`	`boolresult= false;`
	`215`	`+#ifdefUSE_SSE2`
	`216`	`+Vector8umin;`
	`217`	`+Vector8cmpe;`
	`218`	`+#endif`
`216`	`219`
`217`	`220`	`/* pre-compute the result for assert checking */`
`218`	`221`	`#ifdefUSE_ASSERT_CHECKING`
`@@ -250,14 +253,12 @@ vector8_has_le(const Vector8 v, const uint8 c)`
`250`	`253`	`}`
`251`	`254`	`}`
`252`	`255`	`}`
`253`		`-#else`
`254`		`-`
`255`		`-/*`
`256`		`- * Use saturating subtraction to find bytes <= c, which will present as`
`257`		`- * NUL bytes. This approach is a workaround for the lack of unsigned`
`258`		`- * comparison instructions on some architectures.`
`259`		`- */`
`260`		`-result=vector8_has_zero(vector8_ssub(v,vector8_broadcast(c)));`
	`256`	`+#elif defined(USE_SSE2)`
	`257`	`+umin=vector8_min(v,vector8_broadcast(c));`
	`258`	`+cmpe=vector8_eq(umin,v);`
	`259`	`+result=vector8_is_highbit_set(cmpe);`
	`260`	`+#elif defined(USE_NEON)`
	`261`	`+result=vminvq_u8(v) <=c;`
`261`	`262`	`#endif`
`262`	`263`
`263`	`264`	`Assert(assert_result==result);`
`@@ -358,24 +359,6 @@ vector32_or(const Vector32 v1, const Vector32 v2)`
`358`	`359`	`}`
`359`	`360`	`#endif/* ! USE_NO_SIMD */`
`360`	`361`
`361`		`-/*`
`362`		`- * Return the result of subtracting the respective elements of the input`
`363`		`- * vectors using saturation (i.e., if the operation would yield a value less`
`364`		`- * than zero, zero is returned instead). For more information on saturation`
`365`		`- * arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic`
`366`		`- */`
`367`		`-#ifndefUSE_NO_SIMD`
`368`		`-staticinlineVector8`
`369`		`-vector8_ssub(constVector8v1,constVector8v2)`
`370`		`-{`
`371`		`-#ifdefUSE_SSE2`
`372`		`-return_mm_subs_epu8(v1,v2);`
`373`		`-#elif defined(USE_NEON)`
`374`		`-returnvqsubq_u8(v1,v2);`
`375`		`-#endif`
`376`		`-}`
`377`		`-#endif/* ! USE_NO_SIMD */`
`378`		`-`
`379`	`362`	`/*`
`380`	`363`	`* Return a vector with all bits set in each lane where the corresponding`
`381`	`364`	`* lanes in the inputs are equal.`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitf8f4afe

File tree

1 file changed

1 file changed

`‎src/include/port/simd.h‎`

0 commit comments