ExtendedAtomicOps-arm.h 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930
  1. #if defined(_MSC_VER)
  2. # include "os/Win32/WindowsHeaders.h"
  3. # include <intrin.h>
  4. #else
  5. # define ASM_DMB_ISH "dmb ish\n\t"
  6. # if defined(__ARM_ARCH_7S__)
  7. // this is sufficient for Swift processors
  8. # define ASM_REL "dmb ishst\n\t"
  9. # else
  10. # define ASM_REL "dmb ish\n\t"
  11. # endif
  12. # define ASM_CLREX "clrex\n\t"
  13. # define ASM_ISB "isb\n\t"
  14. # define ASM_LABEL(i) #i ":\n\t"
  15. #endif
  16. static inline void atomic_pause()
  17. {
  18. }
  19. static inline void atomic_thread_fence(memory_order_relaxed_t)
  20. {
  21. }
  22. static inline void atomic_thread_fence(memory_order_release_t)
  23. {
  24. #if defined(_MSC_VER)
  25. __dmb(_ARM_BARRIER_ISH);
  26. #else
  27. __asm__ __volatile__ (ASM_REL : : : "memory");
  28. #endif
  29. }
  30. static inline void atomic_thread_fence(memory_order_acquire_t)
  31. {
  32. #if defined(_MSC_VER)
  33. __dmb(_ARM_BARRIER_ISH);
  34. #else
  35. __asm__ __volatile__ (ASM_DMB_ISH : : : "memory");
  36. #endif
  37. }
  38. static inline void atomic_thread_fence(memory_order_acq_rel_t)
  39. {
  40. #if defined(_MSC_VER)
  41. __dmb(_ARM_BARRIER_ISH);
  42. #else
  43. __asm__ __volatile__ (ASM_DMB_ISH : : : "memory");
  44. #endif
  45. }
  46. static inline void atomic_thread_fence(int /* memory_order_seq_cst_t */)
  47. {
  48. #if defined(_MSC_VER)
  49. __dmb(_ARM_BARRIER_ISH);
  50. #else
  51. __asm__ __volatile__ (ASM_DMB_ISH : : : "memory");
  52. #endif
  53. }
  54. #define ATOMIC_LOAD(PRE, POST) \
  55. atomic_word res; \
  56. __asm__ __volatile__ \
  57. ( \
  58. PRE \
  59. "ldr %0, %1\n\t" \
  60. POST \
  61. : "=r" (res) \
  62. : "m" (*p) \
  63. : "memory" \
  64. ); \
  65. return res;
  66. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, memory_order_relaxed_t)
  67. {
  68. #if defined(_MSC_VER)
  69. return (atomic_word)__iso_volatile_load32((const volatile __int32*)p);
  70. #else
  71. ATOMIC_LOAD("", "")
  72. #endif
  73. }
  74. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, memory_order_acquire_t)
  75. {
  76. #if defined(_MSC_VER)
  77. atomic_word res = (atomic_word)__iso_volatile_load32((const volatile __int32*)p);
  78. __dmb(_ARM_BARRIER_ISH);
  79. return res;
  80. #else
  81. ATOMIC_LOAD("", ASM_DMB_ISH)
  82. #endif
  83. }
  84. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, int /* memory_order_seq_cst_t */)
  85. {
  86. #if defined(_MSC_VER)
  87. atomic_word res = (atomic_word)__iso_volatile_load32((const volatile __int32*)p);
  88. __dmb(_ARM_BARRIER_ISH);
  89. return res;
  90. #else
  91. ATOMIC_LOAD("", ASM_DMB_ISH)
  92. #endif
  93. }
  94. #define ATOMIC_STORE(PRE, POST) \
  95. __asm__ __volatile__ \
  96. ( \
  97. PRE \
  98. "str %1, %0\n\t" \
  99. POST \
  100. : "=m" (*p) \
  101. : "r" (v) \
  102. : "memory" \
  103. );
  104. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  105. {
  106. #if defined(_MSC_VER)
  107. __iso_volatile_store32((volatile __int32*)p, (__int32)v);
  108. #else
  109. ATOMIC_STORE("", "")
  110. #endif
  111. }
  112. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  113. {
  114. #if defined(_MSC_VER)
  115. __dmb(_ARM_BARRIER_ISH);
  116. __iso_volatile_store32((volatile __int32*)p, (__int32)v);
  117. #else
  118. ATOMIC_STORE(ASM_REL, "")
  119. #endif
  120. }
  121. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  122. {
  123. #if defined(_MSC_VER)
  124. __dmb(_ARM_BARRIER_ISH);
  125. __iso_volatile_store32((volatile __int32*)p, (__int32)v);
  126. __dmb(_ARM_BARRIER_ISH);
  127. #else
  128. ATOMIC_STORE(ASM_REL, ASM_DMB_ISH)
  129. #endif
  130. }
  131. #define ATOMIC_XCHG(PRE, POST) \
  132. atomic_word res; \
  133. atomic_word success; \
  134. __asm__ __volatile__ \
  135. ( \
  136. PRE \
  137. ASM_LABEL (0) \
  138. "ldrex %2, [%4]\n\t" \
  139. "strex %0, %3, [%4]\n\t" \
  140. "teq %0, #0\n\t" \
  141. "bne 0b\n\t" \
  142. POST \
  143. : "=&r" (success), "+m" (*p), "=&r" (res) \
  144. : "r" (v), "r" (p) \
  145. : "cc", "memory" \
  146. ); \
  147. return res;
  148. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  149. {
  150. #if defined(_MSC_VER)
  151. return (atomic_word)_InterlockedExchange_nf((long volatile*)p, (long)v);
  152. #else
  153. ATOMIC_XCHG("", "")
  154. #endif
  155. }
  156. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  157. {
  158. #if defined(_MSC_VER)
  159. // _InterlockedExchange_rel is documented by Microsoft, but it doesn't seem to be defined
  160. __dmb(_ARM_BARRIER_ISH);
  161. return (atomic_word)_InterlockedExchange_nf((long volatile*)p, (long)v);
  162. #else
  163. ATOMIC_XCHG(ASM_REL, "")
  164. #endif
  165. }
  166. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_acquire_t)
  167. {
  168. #if defined(_MSC_VER)
  169. return (atomic_word)_InterlockedExchange_acq((long volatile*)p, (long)v);
  170. #else
  171. ATOMIC_XCHG("", ASM_ISB)
  172. #endif
  173. }
  174. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_acq_rel_t)
  175. {
  176. #if defined(_MSC_VER)
  177. __dmb(_ARM_BARRIER_ISH);
  178. return (atomic_word)_InterlockedExchange_acq((long volatile*)p, (long)v);
  179. #else
  180. ATOMIC_XCHG(ASM_REL, ASM_ISB)
  181. #endif
  182. }
  183. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  184. {
  185. #if defined(_MSC_VER)
  186. // _InterlockedExchange_rel is documented by Microsoft, but it doesn't seem to be defined
  187. __dmb(_ARM_BARRIER_ISH);
  188. atomic_word res = (atomic_word)_InterlockedExchange_nf((long volatile*)p, (long)v);
  189. __dmb(_ARM_BARRIER_ISH);
  190. return res;
  191. #else
  192. ATOMIC_XCHG(ASM_REL, ASM_DMB_ISH)
  193. #endif
  194. }
  195. #if !defined(_MSC_VER)
  196. // atomic_compare_exchange_weak_explicit: can fail spuriously even if *p == *oldval
  197. #define ATOMIC_CMP_XCHG(PRE, POST) \
  198. atomic_word res; \
  199. atomic_word success = 0; \
  200. __asm__ __volatile__ \
  201. ( \
  202. PRE \
  203. "ldrex %2, [%4]\n\t" \
  204. "teq %2, %5\n\t" \
  205. "it eq\n\t" \
  206. "strexeq %0, %3, [%4]\n\t" \
  207. POST \
  208. : "+r" (success), "+m" (*p), "=&r" (res) \
  209. : "r" (newval), "r" (p), "r" (*oldval) \
  210. : "cc", "memory" \
  211. ); \
  212. *oldval = res; \
  213. return success;
  214. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_relaxed_t, memory_order_relaxed_t)
  215. {
  216. ATOMIC_CMP_XCHG("", "")
  217. }
  218. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_relaxed_t)
  219. {
  220. ATOMIC_CMP_XCHG(ASM_REL, "")
  221. }
  222. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_relaxed_t)
  223. {
  224. ATOMIC_CMP_XCHG("", "teq %0, #0\n\tbne 1f\n\t" ASM_ISB ASM_LABEL(1))
  225. }
  226. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  227. {
  228. ATOMIC_CMP_XCHG(ASM_REL, "teq %0, #0\n\tbne 1f\n\t" ASM_ISB ASM_LABEL(1))
  229. }
  230. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, memory_order_relaxed_t)
  231. {
  232. ATOMIC_CMP_XCHG(ASM_REL, "teq %0, #0\n\tbne 1f\n\t" ASM_DMB_ISH ASM_LABEL(1))
  233. }
  234. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_release_t)
  235. {
  236. ATOMIC_CMP_XCHG(ASM_REL, "")
  237. }
  238. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_acquire_t)
  239. {
  240. ATOMIC_CMP_XCHG("", ASM_ISB)
  241. }
  242. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_acq_rel_t)
  243. {
  244. ATOMIC_CMP_XCHG(ASM_REL, ASM_ISB)
  245. }
  246. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, int /* memory_order_seq_cst_t */)
  247. {
  248. ATOMIC_CMP_XCHG(ASM_REL, ASM_DMB_ISH)
  249. }
  250. #endif
  251. // atomic_compare_exchange_strong_explicit: does loop and only returns false if *p != *oldval
  252. #undef ATOMIC_CMP_XCHG
  253. #define ATOMIC_CMP_XCHG(PRE, POST) \
  254. atomic_word res; \
  255. atomic_word success = 0; \
  256. __asm__ __volatile__ \
  257. ( \
  258. PRE \
  259. ASM_LABEL (0) \
  260. "ldrex %2, [%4]\n\t" \
  261. "teq %2, %5\n\t" \
  262. "bne 1f\n\t" \
  263. "strex %0, %3, [%4]\n\t" \
  264. "teq %0, #0\n\t" \
  265. "bne 0b\n\t" \
  266. POST \
  267. : "=&r" (success), "+m" (*p), "=&r" (res) \
  268. : "r" (newval), "r" (p), "r" (*oldval) \
  269. : "cc", "memory" \
  270. ); \
  271. *oldval = res; \
  272. return success;
  273. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_relaxed_t, memory_order_relaxed_t)
  274. {
  275. #if defined(_MSC_VER)
  276. long tmp = _InterlockedCompareExchange_nf((long volatile*)p, (long)newval, (long)*oldval);
  277. return *oldval == tmp ? true : (*oldval = tmp, false);
  278. #else
  279. ATOMIC_CMP_XCHG("", ASM_LABEL(1) ASM_CLREX)
  280. #endif
  281. }
  282. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_release_t)
  283. {
  284. #if defined(_MSC_VER)
  285. long tmp = _InterlockedCompareExchange_rel((long volatile*)p, (long)newval, (long)*oldval);
  286. return *oldval == tmp ? true : (*oldval = tmp, false);
  287. #else
  288. ATOMIC_CMP_XCHG(ASM_REL, ASM_LABEL(1) ASM_CLREX)
  289. #endif
  290. }
  291. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_acquire_t)
  292. {
  293. #if defined(_MSC_VER)
  294. long tmp = _InterlockedCompareExchange_acq((long volatile*)p, (long)newval, (long)*oldval);
  295. return *oldval == tmp ? true : (*oldval = tmp, false);
  296. #else
  297. ATOMIC_CMP_XCHG("", ASM_LABEL(1) ASM_CLREX ASM_ISB)
  298. #endif
  299. }
  300. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_acq_rel_t)
  301. {
  302. #if defined(_MSC_VER)
  303. __dmb(_ARM_BARRIER_ISH);
  304. long tmp = _InterlockedCompareExchange_acq((long volatile*)p, (long)newval, (long)*oldval);
  305. return *oldval == tmp ? true : (*oldval = tmp, false);
  306. #else
  307. ATOMIC_CMP_XCHG(ASM_REL, ASM_LABEL(1) ASM_CLREX ASM_ISB)
  308. #endif
  309. }
  310. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, int /* memory_order_seq_cst_t */)
  311. {
  312. #if defined(_MSC_VER)
  313. long tmp = _InterlockedCompareExchange_rel((long volatile*)p, (long)newval, (long)*oldval);
  314. __dmb(_ARM_BARRIER_ISH);
  315. return *oldval == tmp ? true : (*oldval = tmp, false);
  316. #else
  317. ATOMIC_CMP_XCHG(ASM_REL, ASM_LABEL(1) ASM_CLREX ASM_DMB_ISH)
  318. #endif
  319. }
  320. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_relaxed_t)
  321. {
  322. #if defined(_MSC_VER)
  323. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_release, memory_order_release);
  324. #else
  325. ATOMIC_CMP_XCHG(ASM_REL, ASM_LABEL(1) ASM_CLREX)
  326. #endif
  327. }
  328. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_relaxed_t)
  329. {
  330. #if defined(_MSC_VER)
  331. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_acquire, memory_order_acquire);
  332. #else
  333. ATOMIC_CMP_XCHG("", ASM_ISB ASM_LABEL(1) ASM_CLREX)
  334. #endif
  335. }
  336. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  337. {
  338. #if defined(_MSC_VER)
  339. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_acq_rel, memory_order_acq_rel);
  340. #else
  341. ATOMIC_CMP_XCHG(ASM_REL, ASM_ISB ASM_LABEL(1) ASM_CLREX)
  342. #endif
  343. }
  344. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, memory_order_relaxed_t)
  345. {
  346. #if defined(_MSC_VER)
  347. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_seq_cst, memory_order_seq_cst);
  348. #else
  349. ATOMIC_CMP_XCHG(ASM_REL, ASM_DMB_ISH ASM_LABEL(1) ASM_CLREX)
  350. #endif
  351. }
  352. #if defined(_MSC_VER)
  353. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_relaxed_t, memory_order_relaxed_t)
  354. {
  355. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_relaxed, memory_order_relaxed);
  356. }
  357. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_relaxed_t)
  358. {
  359. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_release, memory_order_relaxed);
  360. }
  361. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_relaxed_t)
  362. {
  363. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_acquire, memory_order_relaxed);
  364. }
  365. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  366. {
  367. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_acq_rel, memory_order_relaxed);
  368. }
  369. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, memory_order_relaxed_t)
  370. {
  371. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_seq_cst, memory_order_relaxed);
  372. }
  373. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_release_t)
  374. {
  375. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_release, memory_order_release);
  376. }
  377. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_acquire_t)
  378. {
  379. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_acquire, memory_order_acquire);
  380. }
  381. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_acq_rel_t)
  382. {
  383. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_acq_rel, memory_order_acq_rel);
  384. }
  385. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, int /* memory_order_seq_cst_t */)
  386. {
  387. return atomic_compare_exchange_strong_explicit(p, oldval, newval, memory_order_seq_cst, memory_order_seq_cst);
  388. }
  389. #endif
  390. #define ATOMIC_OP(PRE, OP, POST) \
  391. atomic_word res, tmp; \
  392. atomic_word success; \
  393. __asm__ __volatile__ \
  394. ( \
  395. PRE \
  396. ASM_LABEL (0) \
  397. "ldrex %2, [%5]\n\t" \
  398. OP " %3, %2, %4\n\t" \
  399. "strex %0, %3, [%5]\n\t" \
  400. "teq %0, #0\n\t" \
  401. "bne 0b\n\t" \
  402. POST \
  403. : "=&r" (success), "+m" (*p), "=&r" (res), "=&r" (tmp) \
  404. : "Ir" (v), "r" (p) \
  405. : "cc", "memory" \
  406. ); \
  407. return res;
  408. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  409. {
  410. #if defined(_MSC_VER)
  411. return (atomic_word)_InterlockedExchangeAdd_nf((volatile long*)p, (long)v);
  412. #else
  413. ATOMIC_OP("", "add", "")
  414. #endif
  415. }
  416. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  417. {
  418. #if defined(_MSC_VER)
  419. return (atomic_word)_InterlockedExchangeAdd_rel((volatile long*)p, (long)v);
  420. #else
  421. ATOMIC_OP(ASM_REL, "add", "")
  422. #endif
  423. }
  424. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_acquire_t)
  425. {
  426. #if defined(_MSC_VER)
  427. return (atomic_word)_InterlockedExchangeAdd_acq((volatile long*)p, (long)v);
  428. #else
  429. ATOMIC_OP("", "add", ASM_ISB)
  430. #endif
  431. }
  432. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_acq_rel_t)
  433. {
  434. #if defined(_MSC_VER)
  435. __dmb(_ARM_BARRIER_ISH);
  436. return (atomic_word)_InterlockedExchangeAdd_acq((volatile long*)p, (long)v);
  437. #else
  438. ATOMIC_OP(ASM_REL, "add", ASM_ISB)
  439. #endif
  440. }
  441. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  442. {
  443. #if defined(_MSC_VER)
  444. long oldval = _InterlockedExchangeAdd_rel((volatile long*)p, (long)v);
  445. __dmb(_ARM_BARRIER_ISH);
  446. return (atomic_word)oldval;
  447. #else
  448. ATOMIC_OP(ASM_REL, "add", ASM_DMB_ISH)
  449. #endif
  450. }
  451. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  452. {
  453. #if defined(_MSC_VER)
  454. return atomic_fetch_add_explicit(p, -v, memory_order_relaxed);
  455. #else
  456. ATOMIC_OP("", "sub", "")
  457. #endif
  458. }
  459. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  460. {
  461. #if defined(_MSC_VER)
  462. return atomic_fetch_add_explicit(p, -v, memory_order_release);
  463. #else
  464. ATOMIC_OP(ASM_REL, "sub", "")
  465. #endif
  466. }
  467. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_acquire_t)
  468. {
  469. #if defined(_MSC_VER)
  470. return atomic_fetch_add_explicit(p, -v, memory_order_acquire);
  471. #else
  472. ATOMIC_OP("", "sub", ASM_ISB)
  473. #endif
  474. }
  475. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_acq_rel_t)
  476. {
  477. #if defined(_MSC_VER)
  478. return atomic_fetch_add_explicit(p, -v, memory_order_acq_rel);
  479. #else
  480. ATOMIC_OP(ASM_REL, "sub", ASM_ISB)
  481. #endif
  482. }
  483. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  484. {
  485. #if defined(_MSC_VER)
  486. return atomic_fetch_add_explicit(p, -v, memory_order_seq_cst);
  487. #else
  488. ATOMIC_OP(ASM_REL, "sub", ASM_DMB_ISH)
  489. #endif
  490. }
  491. /*
  492. * extensions
  493. */
  494. static inline void atomic_retain(volatile int* p)
  495. {
  496. #if defined(_MSC_VER)
  497. _InterlockedIncrement_nf((volatile long*)p);
  498. #else
  499. atomic_fetch_add_explicit(p, 1, memory_order_relaxed);
  500. #endif
  501. }
  502. static inline bool atomic_release(volatile int* p)
  503. {
  504. #if defined(_MSC_VER)
  505. // _interlockedDecrement returns the resulting decremented value
  506. bool res = _InterlockedDecrement_rel((volatile long*)p) == 0;
  507. if (res)
  508. {
  509. __dmb(_ARM_BARRIER_ISH);
  510. }
  511. #else
  512. bool res = atomic_fetch_sub_explicit(p, 1, memory_order_release) == 1;
  513. if (res)
  514. {
  515. atomic_thread_fence(memory_order_acquire);
  516. }
  517. #endif
  518. return res;
  519. }
  520. /*
  521. * double word
  522. */
  523. // Note: the only way to get atomic 64-bit memory accesses on ARM is to use ldrexd/strexd with a loop
  524. // (ldrd and strd instructions are not guaranteed to appear atomic)
  525. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_relaxed_t)
  526. {
  527. #if defined(_MSC_VER)
  528. // atomic_word2 r; r.v = __iso_volatile_load64 ((volatile __int64*) p); return r;
  529. atomic_word2 r;
  530. r.v = _InterlockedCompareExchange64_nf((volatile __int64*)p, (__int64)0, (__int64)0);
  531. return r;
  532. #else
  533. register atomic_word lo __asm__ ("r2");
  534. register atomic_word hi __asm__ ("r3");
  535. atomic_word success;
  536. __asm__ __volatile__
  537. (
  538. ASM_LABEL(0)
  539. "ldrexd\t%1, %2, [%3]\n\t"
  540. "strexd\t%0, %1, %2, [%3]\n\t"
  541. "teq\t%0, #0\n\t"
  542. "bne\t0b\n\t"
  543. : "=&r" (success), "=&r" (lo), "=&r" (hi)
  544. : "r" (p)
  545. : "cc", "r2", "r3"
  546. );
  547. atomic_word2 w;
  548. w.lo = lo;
  549. w.hi = hi;
  550. return w;
  551. #endif
  552. }
  553. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_acquire_t)
  554. {
  555. #if defined(_MSC_VER)
  556. atomic_word2 r;
  557. r.v = _InterlockedCompareExchange64_acq((volatile __int64*)p, (__int64)0, (__int64)0);
  558. return r;
  559. #else
  560. register atomic_word lo __asm__ ("r2");
  561. register atomic_word hi __asm__ ("r3");
  562. atomic_word success;
  563. __asm__ __volatile__
  564. (
  565. ASM_LABEL(0)
  566. "ldrexd\t%1, %2, [%3]\n\t"
  567. "strexd\t%0, %1, %2, [%3]\n\t"
  568. "teq\t%0, #0\n\t"
  569. "bne\t0b\n\t"
  570. ASM_ISB
  571. : "=&r" (success), "=&r" (lo), "=&r" (hi)
  572. : "r" (p)
  573. : "cc", "memory", "r2", "r3"
  574. );
  575. atomic_word2 w;
  576. w.lo = lo;
  577. w.hi = hi;
  578. return w;
  579. #endif
  580. }
  581. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_relaxed_t)
  582. {
  583. #if defined(_MSC_VER)
  584. atomic_word2 w, x;
  585. w = v;
  586. x = v;
  587. do
  588. {
  589. w.v = _InterlockedCompareExchange64_nf((volatile __int64*)p, x.v, w.v);
  590. }
  591. while (w.v != x.v);
  592. #else
  593. register atomic_word l __asm__ ("r2");
  594. register atomic_word h __asm__ ("r3");
  595. register atomic_word lo __asm__ ("r0") = v.lo;
  596. register atomic_word hi __asm__ ("r1") = v.hi;
  597. atomic_word success;
  598. __asm__ __volatile__
  599. (
  600. ASM_LABEL(0)
  601. "ldrexd\t%2, %3, [%6]\n\t"
  602. "strexd\t%0, %4, %5, [%6]\n\t"
  603. "teq\t%0, #0\n\t"
  604. "bne\t0b\n\t"
  605. : "=&r" (success), "=m" (*p), "=&r" (l), "=&r" (h)
  606. : "r" (lo), "r" (hi), "r" (p)
  607. : "cc", "memory", "r2", "r3"
  608. );
  609. #endif
  610. }
  611. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_release_t)
  612. {
  613. #if defined(_MSC_VER)
  614. atomic_word2 w, x;
  615. w = v;
  616. x = v;
  617. do
  618. {
  619. w.v = _InterlockedCompareExchange64_rel((volatile __int64*)p, x.v, w.v);
  620. }
  621. while (w.v != x.v);
  622. #else
  623. register atomic_word l __asm__ ("r2");
  624. register atomic_word h __asm__ ("r3");
  625. register atomic_word lo __asm__ ("r0") = v.lo;
  626. register atomic_word hi __asm__ ("r1") = v.hi;
  627. atomic_word success;
  628. __asm__ __volatile__
  629. (
  630. ASM_REL
  631. ASM_LABEL(0)
  632. "ldrexd\t%2, %3, [%6]\n\t"
  633. "strexd\t%0, %4, %5, [%6]\n\t"
  634. "teq\t%0, #0\n\t"
  635. "bne\t0b\n\t"
  636. : "=&r" (success), "=m" (*p), "=&r" (l), "=&r" (h)
  637. : "r" (lo), "r" (hi), "r" (p)
  638. : "cc", "memory", "r2", "r3"
  639. );
  640. #endif
  641. }
  642. static inline atomic_word2 atomic_exchange_explicit(volatile atomic_word2* p, atomic_word2 val, memory_order_acq_rel_t)
  643. {
  644. #if defined(_MSC_VER)
  645. __dmb(_ARM_BARRIER_ISH);
  646. atomic_word2 w;
  647. w.v = _InterlockedExchange64_acq((__int64 volatile*)p, (__int64)val.v);
  648. return w;
  649. #else
  650. register atomic_word l __asm__ ("r0");
  651. register atomic_word h __asm__ ("r1");
  652. register atomic_word lo __asm__ ("r2") = val.lo;
  653. register atomic_word hi __asm__ ("r3") = val.hi;
  654. atomic_word success;
  655. __asm__ __volatile__
  656. (
  657. ASM_REL
  658. ASM_LABEL(0)
  659. "ldrexd\t%2, %3, [%6]\n\t"
  660. "strexd\t%0, %5, %4, [%6]\n\t"
  661. "teq\t%0, #0\n\t"
  662. "bne\t0b\n\t"
  663. ASM_ISB
  664. : "=&r" (success), "=m" (*p), "=&r" (l), "=&r" (h)
  665. : "r" (hi), "r" (lo), "r" (p)
  666. : "cc", "memory", "r0", "r1", "r3"
  667. );
  668. val.lo = l;
  669. val.hi = h;
  670. return val;
  671. #endif
  672. }
  673. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_acquire_t, memory_order_relaxed_t)
  674. {
  675. #if defined(_MSC_VER)
  676. __int64 tmp = _InterlockedCompareExchange64_acq((volatile __int64*)p, newval.v, oldval->v);
  677. return oldval->v == tmp ? true : (oldval->v = tmp, false);
  678. #else
  679. register atomic_word l __asm__ ("r2");
  680. register atomic_word h __asm__ ("r3");
  681. register atomic_word lo __asm__ ("r0") = newval.lo;
  682. register atomic_word hi __asm__ ("r1") = newval.hi;
  683. atomic_word success;
  684. __asm__ __volatile__
  685. (
  686. ASM_LABEL(0)
  687. "ldrexd\t%2, %3, [%8]\n\t"
  688. "teq\t%3, %5\n\t"
  689. "it\t\teq\n\t"
  690. "teqeq\t%2, %4\n\t"
  691. "bne\t1f\n\t"
  692. "strexd\t%0, %6, %7, [%8]\n\t"
  693. "teq\t%0, #0\n\t"
  694. "bne\t0b\n\t"
  695. ASM_ISB
  696. ASM_LABEL(1)
  697. "clrex\n\t"
  698. : "=&r" (success), "+m" (*p), "=&r" (l), "=&r" (h)
  699. : "r" (oldval->lo), "r" (oldval->hi), "r" (lo), "r" (hi), "r" (p), "0" (1)
  700. : "cc", "memory", "r2", "r3"
  701. );
  702. if (success != 0)
  703. {
  704. oldval->lo = l;
  705. oldval->hi = h;
  706. }
  707. return success == 0;
  708. #endif
  709. }
  710. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_release_t, memory_order_relaxed_t)
  711. {
  712. #if defined(_MSC_VER)
  713. __int64 tmp = _InterlockedCompareExchange64_rel((volatile __int64*)p, newval.v, oldval->v);
  714. return oldval->v == tmp ? true : (oldval->v = tmp, false);
  715. #else
  716. register atomic_word l __asm__ ("r2");
  717. register atomic_word h __asm__ ("r3");
  718. register atomic_word lo __asm__ ("r0") = newval.lo;
  719. register atomic_word hi __asm__ ("r1") = newval.hi;
  720. atomic_word success;
  721. __asm__ __volatile__
  722. (
  723. ASM_REL
  724. ASM_LABEL(0)
  725. "ldrexd\t%2, %3, [%8]\n\t"
  726. "teq\t%3, %5\n\t"
  727. "it\t\teq\n\t"
  728. "teqeq\t%2, %4\n\t"
  729. "bne\t1f\n\t"
  730. "strexd\t%0, %6, %7, [%8]\n\t"
  731. "teq\t%0, #0\n\t"
  732. "bne\t0b\n\t"
  733. ASM_LABEL(1)
  734. "clrex\n\t"
  735. : "=&r" (success), "+m" (*p), "=&r" (l), "=&r" (h)
  736. : "r" (oldval->lo), "r" (oldval->hi), "r" (lo), "r" (hi), "r" (p), "0" (1)
  737. : "cc", "memory", "r2", "r3"
  738. );
  739. if (success != 0)
  740. {
  741. oldval->lo = l;
  742. oldval->hi = h;
  743. }
  744. return success == 0;
  745. #endif
  746. }
  747. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  748. {
  749. #if defined(_MSC_VER)
  750. __dmb(_ARM_BARRIER_ISH);
  751. __int64 tmp = _InterlockedCompareExchange64_acq((volatile __int64*)p, newval.v, oldval->v);
  752. return oldval->v == tmp ? true : (oldval->v = tmp, false);
  753. #else
  754. register atomic_word l __asm__ ("r2");
  755. register atomic_word h __asm__ ("r3");
  756. register atomic_word lo __asm__ ("r0") = newval.lo;
  757. register atomic_word hi __asm__ ("r1") = newval.hi;
  758. atomic_word success;
  759. __asm__ __volatile__
  760. (
  761. ASM_REL
  762. ASM_LABEL(0)
  763. "ldrexd\t%2, %3, [%8]\n\t"
  764. "teq\t%3, %5\n\t"
  765. "it\t\teq\n\t"
  766. "teqeq\t%2, %4\n\t"
  767. "bne\t1f\n\t"
  768. "strexd\t%0, %6, %7, [%8]\n\t"
  769. "teq\t%0, #0\n\t"
  770. "bne\t0b\n\t"
  771. ASM_ISB
  772. ASM_LABEL(1)
  773. "clrex\n\t"
  774. : "=&r" (success), "+m" (*p), "=&r" (l), "=&r" (h)
  775. : "r" (oldval->lo), "r" (oldval->hi), "r" (lo), "r" (hi), "r" (p), "0" (1)
  776. : "cc", "memory", "r2", "r3"
  777. );
  778. if (success != 0)
  779. {
  780. oldval->lo = l;
  781. oldval->hi = h;
  782. }
  783. return success == 0;
  784. #endif
  785. }
  786. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_seq_cst_t, memory_order_relaxed_t)
  787. {
  788. #if defined(_MSC_VER)
  789. __int64 tmp = _InterlockedCompareExchange64_rel((volatile __int64*)p, newval.v, oldval->v);
  790. __dmb(_ARM_BARRIER_ISH);
  791. return oldval->v == tmp ? true : (oldval->v = tmp, false);
  792. #else
  793. register atomic_word l __asm__ ("r2");
  794. register atomic_word h __asm__ ("r3");
  795. register atomic_word lo __asm__ ("r0") = newval.lo;
  796. register atomic_word hi __asm__ ("r1") = newval.hi;
  797. atomic_word success;
  798. __asm__ __volatile__
  799. (
  800. ASM_REL
  801. ASM_LABEL(0)
  802. "ldrexd\t%2, %3, [%8]\n\t"
  803. "teq\t%3, %5\n\t"
  804. "it\t\teq\n\t"
  805. "teqeq\t%2, %4\n\t"
  806. "bne\t1f\n\t"
  807. "strexd\t%0, %6, %7, [%8]\n\t"
  808. "teq\t%0, #0\n\t"
  809. "bne\t0b\n\t"
  810. ASM_DMB_ISH
  811. ASM_LABEL(1)
  812. "clrex\n\t"
  813. : "=&r" (success), "+m" (*p), "=&r" (l), "=&r" (h)
  814. : "r" (oldval->lo), "r" (oldval->hi), "r" (lo), "r" (hi), "r" (p), "0" (1)
  815. : "cc", "memory", "r2", "r3"
  816. );
  817. if (success != 0)
  818. {
  819. oldval->lo = l;
  820. oldval->hi = h;
  821. }
  822. return success == 0;
  823. #endif
  824. }