ExtendedAtomicOps-x86.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. #if defined(_MSC_VER)
  2. # include "os/Win32/WindowsHeaders.h"
  3. # include <intrin.h>
  4. #endif
  5. #if defined(__SSE2__)
  6. # include <emmintrin.h>
  7. #endif
  8. static inline void atomic_pause()
  9. {
  10. #if defined(_MSC_VER)
  11. _mm_pause();
  12. #else
  13. __asm__ __volatile__ ("rep; nop");
  14. #endif
  15. }
  16. static inline void atomic_thread_fence(memory_order_relaxed_t)
  17. {
  18. }
  19. static inline void atomic_thread_fence(memory_order_release_t)
  20. {
  21. #if defined(_MSC_VER)
  22. _ReadWriteBarrier();
  23. #else
  24. __asm__ __volatile__ ("" : : : "memory");
  25. #endif
  26. }
  27. static inline void atomic_thread_fence(memory_order_acquire_t)
  28. {
  29. #if defined(_MSC_VER)
  30. _ReadWriteBarrier();
  31. #else
  32. __asm__ __volatile__ ("" : : : "memory");
  33. #endif
  34. }
  35. static inline void atomic_thread_fence(memory_order_acq_rel_t)
  36. {
  37. #if defined(_MSC_VER)
  38. _ReadWriteBarrier();
  39. #else
  40. __asm__ __volatile__ ("" : : : "memory");
  41. #endif
  42. }
  43. static inline void atomic_thread_fence(int /* memory_order_seq_cst_t */)
  44. {
  45. #if defined(__SSE2__)
  46. _mm_mfence();
  47. #elif defined(_MSC_VER)
  48. volatile LONG tmp;
  49. _InterlockedOr(&tmp, 0);
  50. #else
  51. __asm__ __volatile__ ("lock orl #0, 0(%%esp)" ::: "cc", "memory");
  52. #endif
  53. }
  54. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, memory_order_relaxed_t)
  55. {
  56. return *p;
  57. }
  58. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, int)
  59. {
  60. atomic_word v;
  61. #if defined(_MSC_VER)
  62. v = *p;
  63. _ReadWriteBarrier();
  64. #else
  65. __asm__ __volatile__ ("movl %1, %0" : "=r" (v) : "m" (*p) : "memory");
  66. #endif
  67. return v;
  68. }
  69. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  70. {
  71. *p = v;
  72. }
  73. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  74. {
  75. #if defined(_MSC_VER)
  76. _ReadWriteBarrier();
  77. *p = v;
  78. #else
  79. __asm__ __volatile__ ("movl %1, %0" : "=m" (*p) : "r" (v) : "memory");
  80. #endif
  81. }
  82. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word val, int /* memory_order_seq_cst_t */)
  83. {
  84. #if defined(_MSC_VER)
  85. _InterlockedExchange((volatile LONG*)p, (LONG)val);
  86. #else
  87. // lock prefix is implicit
  88. __asm__ __volatile__
  89. (
  90. /*lock*/ "xchgl %1, %0"
  91. : "+m" (*p), "+r" (val)
  92. :
  93. : "memory"
  94. );
  95. #endif
  96. }
  97. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word val, int)
  98. {
  99. #if defined(_MSC_VER)
  100. return (atomic_word)_InterlockedExchange((volatile LONG*)p, (LONG)val);
  101. #else
  102. // lock prefix is implicit
  103. __asm__ __volatile__
  104. (
  105. /*lock*/ "xchgl %1, %0"
  106. : "+m" (*p), "+r" (val)
  107. :
  108. : "memory"
  109. );
  110. return val;
  111. #endif
  112. }
  113. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word* oldval, atomic_word newval, int, int)
  114. {
  115. #if defined(_MSC_VER)
  116. atomic_word tmp = (atomic_word)_InterlockedCompareExchange((volatile LONG*)p, (LONG)newval, (LONG)*oldval);
  117. return *oldval == tmp ? true : (*oldval = tmp, false);
  118. #else
  119. char res;
  120. __asm__ __volatile__
  121. (
  122. "lock cmpxchgl %3, %0\n\t"
  123. "setz %b1"
  124. : "+m" (*p), "=q" (res), "+a" (*oldval)
  125. : "r" (newval)
  126. : "cc", "memory"
  127. );
  128. return res != 0;
  129. #endif
  130. }
  131. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word* oldval, atomic_word newval, int, int)
  132. {
  133. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_seq_cst, memory_order_seq_cst);
  134. }
  135. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word *p, atomic_word val, int)
  136. {
  137. #if defined(_MSC_VER)
  138. return _InterlockedExchangeAdd((LONG volatile*)p, (LONG)val);
  139. #else
  140. __asm__ __volatile__
  141. (
  142. "lock xaddl %1, %0"
  143. : "+m" (*p), "+r" (val)
  144. :
  145. : "cc", "memory"
  146. );
  147. return val;
  148. #endif
  149. }
  150. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word *p, atomic_word val, int mo)
  151. {
  152. return atomic_fetch_add_explicit(p, -val, mo);
  153. }
  154. /*
  155. * extensions
  156. */
  157. static inline void atomic_retain(volatile int *p)
  158. {
  159. #if defined(_MSC_VER)
  160. _InterlockedIncrement((LONG volatile*)p);
  161. #else
  162. __asm__ (
  163. "lock incl %0\n\t"
  164. : "+m" (*p)
  165. :
  166. : "cc", "memory"
  167. );
  168. #endif
  169. }
  170. static inline bool atomic_release(volatile int *p)
  171. {
  172. #if defined(_MSC_VER)
  173. return _InterlockedDecrement((LONG volatile*)p) == 0;
  174. #else
  175. bool res;
  176. __asm__ (
  177. "lock decl %0\n\t"
  178. "setz %b1"
  179. : "+m" (*p), "=q" (res)
  180. :
  181. : "cc", "memory"
  182. );
  183. return res;
  184. #endif
  185. }
  186. // double word
  187. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, int)
  188. {
  189. atomic_word2 r;
  190. #if defined(__SSE2__)
  191. _mm_store_sd((double*)&r, _mm_load_sd((const double*)p));
  192. #else
  193. // using the FPU is the only way to do a 64 bit atomic load if SSE is not available
  194. r.d = p->d;
  195. #endif
  196. return r;
  197. }
  198. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, int)
  199. {
  200. #if defined(__SSE2__)
  201. _mm_store_sd((double*)p, _mm_load_sd((const double*)&v));
  202. #else
  203. // using the FPU is the only way to do a 64 bit atomic store if SSE is not available
  204. p->d = v.d;
  205. #endif
  206. }
  207. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, int, int)
  208. {
  209. #if defined(_MSC_VER)
  210. LONGLONG tmp = _InterlockedCompareExchange64((volatile LONGLONG*)p, newval.v, oldval->v);
  211. return oldval->v == tmp ? true : (oldval->v = tmp, false);
  212. #else
  213. char res;
  214. __asm__ __volatile__
  215. (
  216. "lock cmpxchg8b %0\n\t"
  217. "setz %b1\n\t"
  218. : "+m" (*p), "=q" (res), "+a" (oldval->lo), "+d" (oldval->hi)
  219. : "b" (newval.lo), "c" (newval.hi)
  220. : "cc", "memory"
  221. );
  222. return res != 0;
  223. #endif
  224. }
  225. static inline atomic_word2 atomic_exchange_explicit(volatile atomic_word2* p, atomic_word2 newval, int)
  226. {
  227. atomic_word2 oldval;
  228. oldval.lo = 0;
  229. oldval.hi = newval.hi - 1;
  230. while (!atomic_compare_exchange_strong_explicit(p, &oldval, newval, memory_order_seq_cst, memory_order_seq_cst))
  231. ;
  232. return oldval;
  233. }