NotificationsYou must be signed in to change notification settings
Fork4.9k
Star17.8k

Commit0709b7e

committed

Change the spinlock primitives to function as compiler barriers.

Previously, they functioned as barriers against CPU reordering but notcompiler reordering, an odd API that required extensive use of volatileeverywhere that spinlocks are used. That's error-prone and has negativeimplications for performance, so change it.In theory, this makes it safe to remove many of the uses of volatilethat we currently have in our code base, but we may find that there aresome bugs in this effort when we do. In the long run, though, thisshould make for much more maintainable code.Patch by me. Review by Andres Freund.

1 parente80252d commit0709b7eCopy full SHA for 0709b7e

File tree

2 files changed

+79

-16

lines changed

src
- backend/storage/lmgr
  - s_lock.c
- include/storage
  - s_lock.h

2 files changed

+79

-16

lines changed

`‎src/backend/storage/lmgr/s_lock.c`

Lines changed: 12 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -154,6 +154,18 @@ s_lock(volatile slock_t lock, const char file, int line)`
`154`	`154`	`returndelays;`
`155`	`155`	`}`
`156`	`156`
	`157`	`+#ifdefUSE_DEFAULT_S_UNLOCK`
	`158`	`+void`
	`159`	`+s_unlock(slock_t*lock)`
	`160`	`+{`
	`161`	`+#ifdefTAS_ACTIVE_WORD`
	`162`	`+/* HP's PA-RISC */`
	`163`	`+*TAS_ACTIVE_WORD(lock)=-1;`
	`164`	`+#else`
	`165`	`+*lock=0;`
	`166`	`+#endif`
	`167`	`+}`
	`168`	`+#endif`
`157`	`169`
`158`	`170`	`/*`
`159`	`171`	`* Set local copy of spins_per_delay during backend startup.`

`‎src/include/storage/s_lock.h`

Lines changed: 67 additions & 16 deletions

Original file line number	Diff line number	Diff line change
`@@ -55,14 +55,16 @@`
`55`	`55`	`*on Alpha TAS() will "fail" if interrupted. Therefore a retry loop must`
`56`	`56`	`*always be used, even if you are certain the lock is free.`
`57`	`57`	`*`
`58`		`- *Another caution for users of these macros is that it is the caller's`
`59`		`- *responsibility to ensure that the compiler doesn't re-order accesses`
`60`		`- *to shared memory to precede the actual lock acquisition, or follow the`
`61`		`- *lock release. Typically we handle this by using volatile-qualified`
`62`		`- *pointers to refer to both the spinlock itself and the shared data`
`63`		`- *structure being accessed within the spinlocked critical section.`
`64`		`- *That fixes it because compilers are not allowed to re-order accesses`
`65`		`- *to volatile objects relative to other such accesses.`
	`58`	`+ *It is the responsibility of these macros to make sure that the compiler`
	`59`	`+ *does not re-order accesses to shared memory to precede the actual lock`
	`60`	`+ *acquisition, or follow the lock release. Prior to PostgreSQL 9.5, this`
	`61`	`+ *was the caller's responsibility, which meant that callers had to use`
	`62`	`+ *volatile-qualified pointers to refer to both the spinlock itself and the`
	`63`	`+ *shared data being accessed within the spinlocked critical section. This`
	`64`	`+ *was notationally awkward, easy to forget (and thus error-prone), and`
	`65`	`+ *prevented some useful compiler optimizations. For these reasons, we`
	`66`	`+ *now require that the macros themselves prevent compiler re-ordering,`
	`67`	`+ *so that the caller doesn't need to take special precautions.`
`66`	`68`	`*`
`67`	`69`	`*On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and`
`68`	`70`	`*S_UNLOCK() macros must further include hardware-level memory fence`
`@@ -399,9 +401,9 @@ tas(volatile slock_t *lock)`
`399`	`401`	`#if defined(__sparcv7)`
`400`	`402`	`/*`
`401`	`403`	`* No stbar or membar available, luckily no actually produced hardware`
`402`		`- * requires a barrier.`
	`404`	`+ * requires a barrier. We fall through to the default gcc definition of`
	`405`	`+ * S_UNLOCK in this case.`
`403`	`406`	`*/`
`404`		`-#defineS_UNLOCK(lock)(((volatile slock_t ) (lock)) = 0)`
`405`	`407`	`#elif__sparcv8`
`406`	`408`	`/* stbar is available (and required for both PSO, RMO), membar isn't */`
`407`	`409`	`#defineS_UNLOCK(lock)\`
`@@ -484,14 +486,14 @@ tas(volatile slock_t *lock)`
`484`	`486`	`#defineS_UNLOCK(lock)\`
`485`	`487`	`do \`
`486`	`488`	`{ \`
`487`		`-__asm__ __volatile__ ("lwsync \n"); \`
	`489`	`+__asm__ __volatile__ ("lwsync \n" ::: "memory"); \`
`488`	`490`	`((volatile slock_t ) (lock)) = 0; \`
`489`	`491`	`} while (0)`
`490`	`492`	`#else`
`491`	`493`	`#defineS_UNLOCK(lock)\`
`492`	`494`	`do \`
`493`	`495`	`{ \`
`494`		`-__asm__ __volatile__ ("sync \n"); \`
	`496`	`+__asm__ __volatile__ ("sync \n" ::: "memory"); \`
`495`	`497`	`((volatile slock_t ) (lock)) = 0; \`
`496`	`498`	`} while (0)`
`497`	`499`	`#endif/* USE_PPC_LWSYNC */`
`@@ -599,7 +601,9 @@ do \`
`599`	`601`	`" .set noreorder \n" \`
`600`	`602`	`" .set nomacro \n" \`
`601`	`603`	`" sync \n" \`
`602`		`-" .set pop "); \`
	`604`	`+" .set pop "`
	`605`	`+:`
	`606`	`+:"memory");`
`603`	`607`	`((volatileslock_t) (lock))=0; \`
`604`	`608`	`}while (0)`
`605`	`609`
`@@ -657,6 +661,23 @@ tas(volatile slock_t *lock)`
`657`	`661`	`typedefunsignedcharslock_t;`
`658`	`662`	`#endif`
`659`	`663`
	`664`	`+/*`
	`665`	`+ * Note that this implementation is unsafe for any platform that can speculate`
	`666`	`+ * a memory access (either load or store) after a following store. That`
	`667`	`+ * happens not to be possible x86 and most legacy architectures (some are`
	`668`	`+ * single-processor!), but many modern systems have weaker memory ordering.`
	`669`	`+ * Those that do must define their own version S_UNLOCK() rather than relying`
	`670`	`+ * on this one.`
	`671`	`+ */`
	`672`	`+#if !defined(S_UNLOCK)`
	`673`	`+#if defined(__INTEL_COMPILER)`
	`674`	`+#defineS_UNLOCK(lock)\`
	`675`	`+do { __memory_barrier(); *(lock) = 0; } while (0)`
	`676`	`+#else`
	`677`	`+#defineS_UNLOCK(lock)\`
	`678`	`+do { __asm__ __volatile__("" : : : "memory"); *(lock) = 0; } while (0)`
	`679`	`+#endif`
	`680`	`+#endif`
`660`	`681`
`661`	`682`	`#endif/* defined(__GNUC__) \|\| defined(__INTEL_COMPILER) */`
`662`	`683`
`@@ -730,9 +751,13 @@ tas(volatile slock_t *lock)`
`730`	`751`	`return (lockval==0);`
`731`	`752`	`}`
`732`	`753`
`733`		`-#endif/* __GNUC__ */`
	`754`	`+#defineS_UNLOCK(lock)\`
	`755`	`+do { \`
	`756`	`+__asm__ __volatile__("" : : : "memory"); \`
	`757`	`+*TAS_ACTIVE_WORD(lock) = -1; \`
	`758`	`+} while (0)`
`734`	`759`
`735`		`-#defineS_UNLOCK(lock)(*TAS_ACTIVE_WORD(lock) = -1)`
	`760`	`+#endif/* __GNUC__ */`
`736`	`761`
`737`	`762`	`#defineS_INIT_LOCK(lock) \`
`738`	`763`	`do { \`
`@@ -770,6 +795,8 @@ typedef unsigned int slock_t;`
`770`	`795`	`#defineTAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE)`
`771`	`796`	`/* On IA64, it's a win to use a non-locking test before the xchg proper */`
`772`	`797`	`#defineTAS_SPIN(lock)(*(lock) ? 1 : TAS(lock))`
	`798`	`+#defineS_UNLOCK(lock)\`
	`799`	`+do { _Asm_sched_fence(); (*(lock)) = 0); } while (0)`
`773`	`800`
`774`	`801`	`#endif/* HPUX on IA64, non gcc */`
`775`	`802`
`@@ -832,6 +859,12 @@ spin_delay(void)`
`832`	`859`	`}`
`833`	`860`	`#endif`
`834`	`861`
	`862`	`+#include<intrin.h>`
	`863`	`+#pragma intrinsic(_ReadWriteBarrier)`
	`864`	`+`
	`865`	`+#defineS_UNLOCK(lock)\`
	`866`	`+do { _ReadWriteBarrier(); (*(lock)) = 0); } while (0)`
	`867`	`+`
`835`	`868`	`#endif`
`836`	`869`
`837`	`870`
`@@ -882,7 +915,25 @@ extern inttas_sema(volatile slock_t *lock);`
`882`	`915`	`#endif/* S_LOCK_FREE */`
`883`	`916`
`884`	`917`	`#if !defined(S_UNLOCK)`
`885`		`-#defineS_UNLOCK(lock)(((volatile slock_t ) (lock)) = 0)`
	`918`	`+/*`
	`919`	`+ * Our default implementation of S_UNLOCK is essentially *(lock) = 0. This`
	`920`	`+ * is unsafe if the platform can speculate a memory access (either load or`
	`921`	`+ * store) after a following store; platforms where this is possible must`
	`922`	`+ * define their own S_UNLOCK. But CPU reordering is not the only concern:`
	`923`	`+ * if we simply defined S_UNLOCK() as an inline macro, the compiler might`
	`924`	`+ * reorder instructions from inside the critical section to occur after the`
	`925`	`+ * lock release. Since the compiler probably can't know what the external`
	`926`	`+ * function s_unlock is doing, putting the same logic there should be adequate.`
	`927`	`+ * A sufficiently-smart globally optimizing compiler could break that`
	`928`	`+ * assumption, though, and the cost of a function call for every spinlock`
	`929`	`+ * release may hurt performance significantly, so we use this implementation`
	`930`	`+ * only for platforms where we don't know of a suitable intrinsic. For the`
	`931`	`+ * most part, those are relatively obscure platform/compiler combinations to`
	`932`	`+ * which the PostgreSQL project does not have access.`
	`933`	`+ */`
	`934`	`+#defineUSE_DEFAULT_S_UNLOCK`
	`935`	`+externvoids_unlock(volatiles_lock*lock);`
	`936`	`+#defineS_UNLOCK(lock)s_unlock(lock)`
`886`	`937`	`#endif/* S_UNLOCK */`
`887`	`938`
`888`	`939`	`#if !defined(S_INIT_LOCK)`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit0709b7e

File tree

2 files changed

2 files changed

`‎src/backend/storage/lmgr/s_lock.c`

`‎src/include/storage/s_lock.h`

0 commit comments