NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit82739d4

committed

Use ARM Advanced SIMD (NEON) intrinsics where available

NEON support is required on the Aarch64 architecture for standardimplementations. Hardware designers for specialized markets can choosenot to support it, but that's true of floating point as well, whichwe assume is supported. As with x86, some SIMD support is availableon 32-bit platforms, but those are not interesting from a performancestandpoint and would require an inconvenient runtime check.Nathan BossartReviewed by John Naylor, Andres Freund, Thomas Munro, and Tom LaneDiscussion:https://www.postgresql.org/message-id/flat/CAFBsxsEyR9JkfbPcDXBRYEfdfC__OkwVGdwEAgY4Rv0cvw35EA%40mail.gmail.com#aba7a64b11503494ffd8dd27067626a9

1 parentf8f19f7 commit82739d4Copy full SHA for 82739d4

File tree

1 file changed

+37

-3

lines changed

src/include/port
- simd.h

1 file changed

+37

-3

lines changed

`‎src/include/port/simd.h`

Lines changed: 37 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,20 @@`
`33`	`33`	`typedef__m128iVector8;`
`34`	`34`	`typedef__m128iVector32;`
`35`	`35`
	`36`	`+#elif defined(__aarch64__)&& defined(__ARM_NEON)`
	`37`	`+/*`
	`38`	`+ * We use the Neon instructions if the compiler provides access to them (as`
	`39`	`+ * indicated by __ARM_NEON) and we are on aarch64. While Neon support is`
	`40`	`+ * technically optional for aarch64, it appears that all available 64-bit`
	`41`	`+ * hardware does have it. Neon exists in some 32-bit hardware too, but we`
	`42`	`+ * could not realistically use it there without a run-time check, which seems`
	`43`	`+ * not worth the trouble for now.`
	`44`	`+ */`
	`45`	`+#include<arm_neon.h>`
	`46`	`+#defineUSE_NEON`
	`47`	`+typedefuint8x16_tVector8;`
	`48`	`+typedefuint32x4_tVector32;`
	`49`	`+`
`36`	`50`	`#else`
`37`	`51`	`/*`
`38`	`52`	`* If no SIMD instructions are available, we can in some cases emulate vector`
`@@ -90,6 +104,8 @@ vector8_load(Vector8 v, const uint8 s)`
`90`	`104`	`{`
`91`	`105`	`#if defined(USE_SSE2)`
`92`	`106`	`v=_mm_loadu_si128((const__m128i)s);`
	`107`	`+#elif defined(USE_NEON)`
	`108`	`+*v=vld1q_u8(s);`
`93`	`109`	`#else`
`94`	`110`	`memcpy(v,s,sizeof(Vector8));`
`95`	`111`	`#endif`
`@@ -101,6 +117,8 @@ vector32_load(Vector32 v, const uint32 s)`
`101`	`117`	`{`
`102`	`118`	`#ifdefUSE_SSE2`
`103`	`119`	`v=_mm_loadu_si128((const__m128i)s);`
	`120`	`+#elif defined(USE_NEON)`
	`121`	`+*v=vld1q_u32(s);`
`104`	`122`	`#endif`
`105`	`123`	`}`
`106`	`124`	`#endif/* ! USE_NO_SIMD */`
`@@ -113,6 +131,8 @@ vector8_broadcast(const uint8 c)`
`113`	`131`	`{`
`114`	`132`	`#if defined(USE_SSE2)`
`115`	`133`	`return_mm_set1_epi8(c);`
	`134`	`+#elif defined(USE_NEON)`
	`135`	`+returnvdupq_n_u8(c);`
`116`	`136`	`#else`
`117`	`137`	`return ~UINT64CONST(0) /0xFF*c;`
`118`	`138`	`#endif`
`@@ -124,6 +144,8 @@ vector32_broadcast(const uint32 c)`
`124`	`144`	`{`
`125`	`145`	`#ifdefUSE_SSE2`
`126`	`146`	`return_mm_set1_epi32(c);`
	`147`	`+#elif defined(USE_NEON)`
	`148`	`+returnvdupq_n_u32(c);`
`127`	`149`	`#endif`
`128`	`150`	`}`
`129`	`151`	`#endif/* ! USE_NO_SIMD */`
`@@ -153,7 +175,7 @@ vector8_has(const Vector8 v, const uint8 c)`
`153`	`175`	`#if defined(USE_NO_SIMD)`
`154`	`176`	`/* any bytes in v equal to c will evaluate to zero via XOR */`
`155`	`177`	`result=vector8_has_zero(v ^vector8_broadcast(c));`
`156`		`-#elif defined(USE_SSE2)`
	`178`	`+#else`
`157`	`179`	`result=vector8_is_highbit_set(vector8_eq(v,vector8_broadcast(c)));`
`158`	`180`	`#endif`
`159`	`181`
`@@ -173,7 +195,7 @@ vector8_has_zero(const Vector8 v)`
`173`	`195`	`* circular definition.`
`174`	`196`	`*/`
`175`	`197`	`returnvector8_has_le(v,0);`
`176`		`-#elif defined(USE_SSE2)`
	`198`	`+#else`
`177`	`199`	`returnvector8_has(v,0);`
`178`	`200`	`#endif`
`179`	`201`	`}`
`@@ -223,7 +245,7 @@ vector8_has_le(const Vector8 v, const uint8 c)`
`223`	`245`	`}`
`224`	`246`	`}`
`225`	`247`	`}`
`226`		`-#elif defined(USE_SSE2)`
	`248`	`+#else`
`227`	`249`
`228`	`250`	`/*`
`229`	`251`	`* Use saturating subtraction to find bytes <= c, which will present as`
`@@ -245,6 +267,8 @@ vector8_is_highbit_set(const Vector8 v)`
`245`	`267`	`{`
`246`	`268`	`#ifdefUSE_SSE2`
`247`	`269`	`return_mm_movemask_epi8(v)!=0;`
	`270`	`+#elif defined(USE_NEON)`
	`271`	`+returnvmaxvq_u8(v)>0x7F;`
`248`	`272`	`#else`
`249`	`273`	`returnv&vector8_broadcast(0x80);`
`250`	`274`	`#endif`
`@@ -258,6 +282,8 @@ vector8_or(const Vector8 v1, const Vector8 v2)`
`258`	`282`	`{`
`259`	`283`	`#ifdefUSE_SSE2`
`260`	`284`	`return_mm_or_si128(v1,v2);`
	`285`	`+#elif defined(USE_NEON)`
	`286`	`+returnvorrq_u8(v1,v2);`
`261`	`287`	`#else`
`262`	`288`	`returnv1 \|v2;`
`263`	`289`	`#endif`
`@@ -269,6 +295,8 @@ vector32_or(const Vector32 v1, const Vector32 v2)`
`269`	`295`	`{`
`270`	`296`	`#ifdefUSE_SSE2`
`271`	`297`	`return_mm_or_si128(v1,v2);`
	`298`	`+#elif defined(USE_NEON)`
	`299`	`+returnvorrq_u32(v1,v2);`
`272`	`300`	`#endif`
`273`	`301`	`}`
`274`	`302`	`#endif/* ! USE_NO_SIMD */`
`@@ -285,6 +313,8 @@ vector8_ssub(const Vector8 v1, const Vector8 v2)`
`285`	`313`	`{`
`286`	`314`	`#ifdefUSE_SSE2`
`287`	`315`	`return_mm_subs_epu8(v1,v2);`
	`316`	`+#elif defined(USE_NEON)`
	`317`	`+returnvqsubq_u8(v1,v2);`
`288`	`318`	`#endif`
`289`	`319`	`}`
`290`	`320`	`#endif/* ! USE_NO_SIMD */`
`@@ -299,6 +329,8 @@ vector8_eq(const Vector8 v1, const Vector8 v2)`
`299`	`329`	`{`
`300`	`330`	`#ifdefUSE_SSE2`
`301`	`331`	`return_mm_cmpeq_epi8(v1,v2);`
	`332`	`+#elif defined(USE_NEON)`
	`333`	`+returnvceqq_u8(v1,v2);`
`302`	`334`	`#endif`
`303`	`335`	`}`
`304`	`336`	`#endif/* ! USE_NO_SIMD */`
`@@ -309,6 +341,8 @@ vector32_eq(const Vector32 v1, const Vector32 v2)`
`309`	`341`	`{`
`310`	`342`	`#ifdefUSE_SSE2`
`311`	`343`	`return_mm_cmpeq_epi32(v1,v2);`
	`344`	`+#elif defined(USE_NEON)`
	`345`	`+returnvceqq_u32(v1,v2);`
`312`	`346`	`#endif`
`313`	`347`	`}`
`314`	`348`	`#endif/* ! USE_NO_SIMD */`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit82739d4

File tree

1 file changed

1 file changed

`‎src/include/port/simd.h`

0 commit comments