|
55 | 55 | *on Alpha TAS() will "fail" if interrupted. Therefore a retry loop must
|
56 | 56 | *always be used, even if you are certain the lock is free.
|
57 | 57 | *
|
58 |
| - *Another caution for users of these macros is that it is the caller's |
59 |
| - *responsibility to ensure that the compiler doesn't re-order accesses |
60 |
| - *to shared memory to precede the actual lock acquisition, or follow the |
61 |
| - *lock release. Typically we handle this by using volatile-qualified |
62 |
| - *pointers to refer to both the spinlock itself and the shared data |
63 |
| - *structure being accessed within the spinlocked critical section. |
64 |
| - *That fixes it because compilers are not allowed to re-order accesses |
65 |
| - *to volatile objects relative to other such accesses. |
| 58 | + *It is the responsibility of these macros to make sure that the compiler |
| 59 | + *does not re-order accesses to shared memory to precede the actual lock |
| 60 | + *acquisition, or follow the lock release. Prior to PostgreSQL 9.5, this |
| 61 | + *was the caller's responsibility, which meant that callers had to use |
| 62 | + *volatile-qualified pointers to refer to both the spinlock itself and the |
| 63 | + *shared data being accessed within the spinlocked critical section. This |
| 64 | + *was notationally awkward, easy to forget (and thus error-prone), and |
| 65 | + *prevented some useful compiler optimizations. For these reasons, we |
| 66 | + *now require that the macros themselves prevent compiler re-ordering, |
| 67 | + *so that the caller doesn't need to take special precautions. |
66 | 68 | *
|
67 | 69 | *On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
|
68 | 70 | *S_UNLOCK() macros must further include hardware-level memory fence
|
@@ -399,9 +401,9 @@ tas(volatile slock_t *lock)
|
399 | 401 | #if defined(__sparcv7)
|
400 | 402 | /*
|
401 | 403 | * No stbar or membar available, luckily no actually produced hardware
|
402 |
| - * requires a barrier. |
| 404 | + * requires a barrier. We fall through to the default gcc definition of |
| 405 | + * S_UNLOCK in this case. |
403 | 406 | */
|
404 |
| -#defineS_UNLOCK(lock)(*((volatile slock_t *) (lock)) = 0) |
405 | 407 | #elif__sparcv8
|
406 | 408 | /* stbar is available (and required for both PSO, RMO), membar isn't */
|
407 | 409 | #defineS_UNLOCK(lock)\
|
@@ -484,14 +486,14 @@ tas(volatile slock_t *lock)
|
484 | 486 | #defineS_UNLOCK(lock)\
|
485 | 487 | do \
|
486 | 488 | { \
|
487 |
| -__asm__ __volatile__ ("lwsync \n"); \ |
| 489 | +__asm__ __volatile__ ("lwsync \n" ::: "memory"); \ |
488 | 490 | *((volatile slock_t *) (lock)) = 0; \
|
489 | 491 | } while (0)
|
490 | 492 | #else
|
491 | 493 | #defineS_UNLOCK(lock)\
|
492 | 494 | do \
|
493 | 495 | { \
|
494 |
| -__asm__ __volatile__ ("sync \n"); \ |
| 496 | +__asm__ __volatile__ ("sync \n" ::: "memory"); \ |
495 | 497 | *((volatile slock_t *) (lock)) = 0; \
|
496 | 498 | } while (0)
|
497 | 499 | #endif/* USE_PPC_LWSYNC */
|
|
599 | 601 | " .set noreorder \n" \
|
600 | 602 | " .set nomacro \n" \
|
601 | 603 | " sync \n" \
|
602 |
| -" .set pop "); \ |
| 604 | +" .set pop " |
| 605 | +: |
| 606 | +:"memory"); |
603 | 607 | *((volatileslock_t*) (lock))=0; \
|
604 | 608 | }while (0)
|
605 | 609 |
|
@@ -657,6 +661,23 @@ tas(volatile slock_t *lock)
|
657 | 661 | typedefunsignedcharslock_t;
|
658 | 662 | #endif
|
659 | 663 |
|
| 664 | +/* |
| 665 | + * Note that this implementation is unsafe for any platform that can speculate |
| 666 | + * a memory access (either load or store) after a following store. That |
| 667 | + * happens not to be possible x86 and most legacy architectures (some are |
| 668 | + * single-processor!), but many modern systems have weaker memory ordering. |
| 669 | + * Those that do must define their own version S_UNLOCK() rather than relying |
| 670 | + * on this one. |
| 671 | + */ |
| 672 | +#if !defined(S_UNLOCK) |
| 673 | +#if defined(__INTEL_COMPILER) |
| 674 | +#defineS_UNLOCK(lock)\ |
| 675 | +do { __memory_barrier(); *(lock) = 0; } while (0) |
| 676 | +#else |
| 677 | +#defineS_UNLOCK(lock)\ |
| 678 | +do { __asm__ __volatile__("" : : : "memory"); *(lock) = 0; } while (0) |
| 679 | +#endif |
| 680 | +#endif |
660 | 681 |
|
661 | 682 | #endif/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
|
662 | 683 |
|
@@ -730,9 +751,13 @@ tas(volatile slock_t *lock)
|
730 | 751 | return (lockval==0);
|
731 | 752 | }
|
732 | 753 |
|
733 |
| -#endif/* __GNUC__ */ |
| 754 | +#defineS_UNLOCK(lock)\ |
| 755 | +do { \ |
| 756 | +__asm__ __volatile__("" : : : "memory"); \ |
| 757 | +*TAS_ACTIVE_WORD(lock) = -1; \ |
| 758 | +} while (0) |
734 | 759 |
|
735 |
| -#defineS_UNLOCK(lock)(*TAS_ACTIVE_WORD(lock) = -1) |
| 760 | +#endif/* __GNUC__ */ |
736 | 761 |
|
737 | 762 | #defineS_INIT_LOCK(lock) \
|
738 | 763 | do { \
|
@@ -770,6 +795,8 @@ typedef unsigned int slock_t;
|
770 | 795 | #defineTAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE)
|
771 | 796 | /* On IA64, it's a win to use a non-locking test before the xchg proper */
|
772 | 797 | #defineTAS_SPIN(lock)(*(lock) ? 1 : TAS(lock))
|
| 798 | +#defineS_UNLOCK(lock)\ |
| 799 | +do { _Asm_sched_fence(); (*(lock)) = 0); } while (0) |
773 | 800 |
|
774 | 801 | #endif/* HPUX on IA64, non gcc */
|
775 | 802 |
|
@@ -832,6 +859,12 @@ spin_delay(void)
|
832 | 859 | }
|
833 | 860 | #endif
|
834 | 861 |
|
| 862 | +#include<intrin.h> |
| 863 | +#pragma intrinsic(_ReadWriteBarrier) |
| 864 | + |
| 865 | +#defineS_UNLOCK(lock)\ |
| 866 | +do { _ReadWriteBarrier(); (*(lock)) = 0); } while (0) |
| 867 | + |
835 | 868 | #endif
|
836 | 869 |
|
837 | 870 |
|
@@ -882,7 +915,25 @@ extern inttas_sema(volatile slock_t *lock);
|
882 | 915 | #endif/* S_LOCK_FREE */
|
883 | 916 |
|
884 | 917 | #if !defined(S_UNLOCK)
|
885 |
| -#defineS_UNLOCK(lock)(*((volatile slock_t *) (lock)) = 0) |
| 918 | +/* |
| 919 | + * Our default implementation of S_UNLOCK is essentially *(lock) = 0. This |
| 920 | + * is unsafe if the platform can speculate a memory access (either load or |
| 921 | + * store) after a following store; platforms where this is possible must |
| 922 | + * define their own S_UNLOCK. But CPU reordering is not the only concern: |
| 923 | + * if we simply defined S_UNLOCK() as an inline macro, the compiler might |
| 924 | + * reorder instructions from inside the critical section to occur after the |
| 925 | + * lock release. Since the compiler probably can't know what the external |
| 926 | + * function s_unlock is doing, putting the same logic there should be adequate. |
| 927 | + * A sufficiently-smart globally optimizing compiler could break that |
| 928 | + * assumption, though, and the cost of a function call for every spinlock |
| 929 | + * release may hurt performance significantly, so we use this implementation |
| 930 | + * only for platforms where we don't know of a suitable intrinsic. For the |
| 931 | + * most part, those are relatively obscure platform/compiler combinations to |
| 932 | + * which the PostgreSQL project does not have access. |
| 933 | + */ |
| 934 | +#defineUSE_DEFAULT_S_UNLOCK |
| 935 | +externvoids_unlock(volatiles_lock*lock); |
| 936 | +#defineS_UNLOCK(lock)s_unlock(lock) |
886 | 937 | #endif/* S_UNLOCK */
|
887 | 938 |
|
888 | 939 | #if !defined(S_INIT_LOCK)
|
|