ExtendedAtomicOps-x86-64.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. #if defined(_MSC_VER)
  2. # include "os/Win32/WindowsHeaders.h"
  3. # include <intrin.h>
  4. #endif
  5. #if defined(__SSE2__)
  6. # include <emmintrin.h>
  7. #endif
  8. static inline void atomic_pause()
  9. {
  10. #if defined(_MSC_VER)
  11. _mm_pause();
  12. #else
  13. __asm__ __volatile__ ("pause");
  14. #endif
  15. }
  16. static inline void atomic_thread_fence(memory_order_relaxed_t)
  17. {
  18. }
  19. static inline void atomic_thread_fence(memory_order_release_t)
  20. {
  21. #if defined(_MSC_VER)
  22. _ReadWriteBarrier();
  23. #else
  24. __asm__ __volatile__ ("" : : : "memory");
  25. #endif
  26. }
  27. static inline void atomic_thread_fence(memory_order_acquire_t)
  28. {
  29. #if defined(_MSC_VER)
  30. _ReadWriteBarrier();
  31. #else
  32. __asm__ __volatile__ ("" : : : "memory");
  33. #endif
  34. }
  35. static inline void atomic_thread_fence(memory_order_acq_rel_t)
  36. {
  37. #if defined(_MSC_VER)
  38. _ReadWriteBarrier();
  39. #else
  40. __asm__ __volatile__ ("" : : : "memory");
  41. #endif
  42. }
  43. static inline void atomic_thread_fence(int /* memory_order_seq_cst_t */)
  44. {
  45. #if defined(__SSE2__)
  46. _mm_mfence();
  47. #elif defined(_MSC_VER)
  48. volatile LONGLONG tmp;
  49. _InterlockedOr64(&tmp, 0);
  50. #else
  51. __asm__ __volatile__ ("lock orl #0, 0(%%esp)" ::: "cc", "memory");
  52. #endif
  53. }
  54. /*
  55. * int support
  56. */
  57. static inline atomic_word atomic_load_explicit(const volatile int* p, memory_order_relaxed_t)
  58. {
  59. return *p;
  60. }
  61. static inline int atomic_load_explicit(const volatile int* p, int)
  62. {
  63. int v;
  64. #if defined(_MSC_VER)
  65. v = *p;
  66. _ReadWriteBarrier();
  67. #else
  68. __asm__ __volatile__ ("movl %1, %0" : "=r" (v) : "m" (*p) : "memory");
  69. #endif
  70. return v;
  71. }
  72. static inline void atomic_store_explicit(volatile int* p, int v, memory_order_relaxed_t)
  73. {
  74. *p = v;
  75. }
  76. static inline void atomic_store_explicit(volatile int* p, int v, memory_order_release_t)
  77. {
  78. #if defined(_MSC_VER)
  79. _ReadWriteBarrier();
  80. *p = v;
  81. #else
  82. __asm__ __volatile__ ("movl %1, %0" : "=m" (*p) : "r" (v) : "memory");
  83. #endif
  84. }
  85. static inline void atomic_store_explicit(volatile int* p, int val, int /* memory_order_seq_cst_t */)
  86. {
  87. #if defined(_MSC_VER)
  88. _InterlockedExchange((volatile LONG*)p, (LONG)val);
  89. #else
  90. // lock prefix is implicit
  91. __asm__ __volatile__
  92. (
  93. /*lock*/ "xchgl %1, %0"
  94. : "+m" (*p), "+r" (val)
  95. :
  96. : "memory"
  97. );
  98. #endif
  99. }
  100. /*
  101. * native word support
  102. */
  103. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, memory_order_relaxed_t)
  104. {
  105. return *p;
  106. }
  107. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, int)
  108. {
  109. atomic_word v;
  110. #if defined(_MSC_VER)
  111. v = *p;
  112. _ReadWriteBarrier();
  113. #else
  114. __asm__ __volatile__ ("movq %1, %0" : "=r" (v) : "m" (*p) : "memory");
  115. #endif
  116. return v;
  117. }
  118. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  119. {
  120. *p = v;
  121. }
  122. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  123. {
  124. #if defined(_MSC_VER)
  125. _ReadWriteBarrier();
  126. *p = v;
  127. #else
  128. __asm__ __volatile__ ("movq %1, %0" : "=m" (*p) : "r" (v) : "memory");
  129. #endif
  130. }
  131. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word val, int /* memory_order_seq_cst_t */)
  132. {
  133. #if defined(_MSC_VER)
  134. _InterlockedExchange64((volatile LONGLONG*)p, (LONGLONG)val);
  135. #else
  136. // lock prefix is implicit
  137. __asm__ __volatile__
  138. (
  139. /*lock*/ "xchgq %1, %0"
  140. : "+m" (*p), "+r" (val)
  141. :
  142. : "memory"
  143. );
  144. #endif
  145. }
  146. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word val, int)
  147. {
  148. #if defined(_MSC_VER)
  149. return (atomic_word)_InterlockedExchange64((volatile LONGLONG*)p, (LONGLONG)val);
  150. #else
  151. // lock prefix is implicit
  152. __asm__ __volatile__
  153. (
  154. /*lock*/ "xchgq %1, %0"
  155. : "+m" (*p), "+r" (val)
  156. :
  157. : "memory"
  158. );
  159. return val;
  160. #endif
  161. }
  162. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word* oldval, atomic_word newval, int, int)
  163. {
  164. #if defined(_MSC_VER)
  165. atomic_word tmp = (atomic_word)_InterlockedCompareExchange64((volatile LONGLONG*)p, (LONGLONG)newval, (LONGLONG)*oldval);
  166. return *oldval == tmp ? true : (*oldval = tmp, false);
  167. #else
  168. char res;
  169. __asm__ __volatile__
  170. (
  171. "lock cmpxchgq %3, %0\n\t"
  172. "setz %b1"
  173. : "+m" (*p), "=q" (res), "+a" (*oldval)
  174. : "r" (newval)
  175. : "cc", "memory"
  176. );
  177. return res != 0;
  178. #endif
  179. }
  180. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word* oldval, atomic_word newval, int, int)
  181. {
  182. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_seq_cst, memory_order_seq_cst);
  183. }
  184. static inline atomic_word atomic_fetch_add_explicit(volatile int *p, int val, int)
  185. {
  186. #if defined(_MSC_VER)
  187. return _InterlockedExchangeAdd((LONG volatile*)p, (LONG)val);
  188. #else
  189. __asm__ __volatile__
  190. (
  191. "lock xaddl\t%1, %0"
  192. : "+m" (*p), "+r" (val)
  193. :
  194. : "cc", "memory"
  195. );
  196. return val;
  197. #endif
  198. }
  199. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word *p, atomic_word val, int)
  200. {
  201. #if defined(_MSC_VER)
  202. return _InterlockedExchangeAdd64((LONGLONG volatile*)p, (LONGLONG)val);
  203. #else
  204. __asm__ __volatile__
  205. (
  206. "lock xaddq %1, %0"
  207. : "+m" (*p), "+r" (val)
  208. :
  209. : "cc", "memory"
  210. );
  211. return val;
  212. #endif
  213. }
  214. static inline atomic_word atomic_fetch_sub_explicit(volatile int *p, int val, int mo)
  215. {
  216. return atomic_fetch_add_explicit(p, -val, mo);
  217. }
  218. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word *p, atomic_word val, int mo)
  219. {
  220. return atomic_fetch_add_explicit(p, -val, mo);
  221. }
  222. /*
  223. * extensions
  224. */
  225. static inline void atomic_retain(volatile int *p)
  226. {
  227. #if defined(_MSC_VER)
  228. _InterlockedIncrement((LONG volatile*)p);
  229. #else
  230. __asm__ (
  231. "lock incl %0\n\t"
  232. : "+m" (*p)
  233. :
  234. : "cc", "memory"
  235. );
  236. #endif
  237. }
  238. static inline bool atomic_release(volatile int *p)
  239. {
  240. #if defined(_MSC_VER)
  241. return _InterlockedDecrement((LONG volatile*)p) == 0;
  242. #else
  243. bool res;
  244. __asm__ (
  245. "lock decl %0\n\t"
  246. "setz %b1"
  247. : "+m" (*p), "=q" (res)
  248. :
  249. : "cc", "memory"
  250. );
  251. return res;
  252. #endif
  253. }
  254. /*
  255. * double word
  256. */
  257. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, int, int)
  258. {
  259. #if defined(_MSC_VER)
  260. return _InterlockedCompareExchange128((volatile LONGLONG*)p, (LONGLONG)newval.hi, (LONGLONG)newval.lo, (LONGLONG*)oldval) != 0;
  261. #else
  262. char res;
  263. __asm__ __volatile__
  264. (
  265. "lock cmpxchg16b %0\n\t"
  266. "setz %b1\n\t"
  267. : "+m" (*p), "=q" (res), "+a" (oldval->lo), "+d" (oldval->hi)
  268. : "b" (newval.lo), "c" (newval.hi)
  269. : "cc", "memory"
  270. );
  271. return res != 0;
  272. #endif
  273. }
  274. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, int o)
  275. {
  276. /*
  277. atomic_word2 r = { 0, 0 };
  278. atomic_word2 c = { 0, 0 };
  279. atomic_compare_exchange_strong_explicit((volatile atomic_word2*) p, &r, c, o, o);
  280. return r;
  281. */
  282. atomic_word2 r;
  283. r.v = _mm_load_si128((const __m128i*)p);
  284. return r;
  285. }
  286. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, int o)
  287. {
  288. /*
  289. atomic_word2 c = v;
  290. while(!atomic_compare_exchange_strong_explicit(p, &c, v, o, o)) {};
  291. */
  292. _mm_store_si128((__m128i*)&p->v, v.v);
  293. }
  294. static inline atomic_word2 atomic_exchange_explicit(volatile atomic_word2* p, atomic_word2 newval, int)
  295. {
  296. atomic_word2 oldval;
  297. oldval.lo = 0;
  298. oldval.hi = newval.hi - 1;
  299. while (!atomic_compare_exchange_strong_explicit(p, &oldval, newval, memory_order_seq_cst, memory_order_seq_cst))
  300. ;
  301. return oldval;
  302. }