ExtendedAtomicOps-arm64.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. #define ASM_DMB_ISH "dmb ish\n\t"
  2. #if defined(__ARM_ARCH_7S__)
  3. // this is sufficient for Swift processors
  4. # define ASM_REL "dmb ishst\n\t"
  5. #else
  6. # define ASM_REL "dmb ish\n\t"
  7. #endif
  8. static inline void atomic_pause()
  9. {
  10. }
  11. static inline void atomic_thread_fence(memory_order_relaxed_t)
  12. {
  13. }
  14. static inline void atomic_thread_fence(memory_order_acquire_t)
  15. {
  16. __asm__ __volatile__ ("dmb ld\n\t" : : : "memory");
  17. }
  18. static inline void atomic_thread_fence(memory_order_release_t)
  19. {
  20. __asm__ __volatile__ (ASM_REL : : : "memory");
  21. }
  22. static inline void atomic_thread_fence(memory_order_acq_rel_t)
  23. {
  24. __asm__ __volatile__ (ASM_DMB_ISH : : : "memory");
  25. }
  26. static inline void atomic_thread_fence(int /* memory_order_seq_cst_t */)
  27. {
  28. __asm__ __volatile__ (ASM_DMB_ISH : : : "memory");
  29. }
  30. #define ATOMIC_LOAD(opc) \
  31. atomic_word res; \
  32. __asm__ __volatile__ \
  33. ( \
  34. opc " %0, %1\n\t" \
  35. : "=r" (res) \
  36. : "m" (*p) \
  37. ); \
  38. return res;
  39. /*
  40. * int support
  41. */
  42. static inline int atomic_load_explicit(const volatile int* p, memory_order_relaxed_t)
  43. {
  44. int res;
  45. __asm__ __volatile__
  46. (
  47. "ldr %w0, %w1\n\t"
  48. : "=r" (res)
  49. : "m" (*p)
  50. );
  51. return res;
  52. }
  53. static inline int atomic_load_explicit(const volatile int* p, memory_order_acquire_t)
  54. {
  55. int res;
  56. __asm__ __volatile__
  57. (
  58. "ldar %w0, %w1\n\t"
  59. : "=r" (res)
  60. : "m" (*p)
  61. );
  62. return res;
  63. }
  64. static inline int atomic_load_explicit(const volatile int* p, int /* memory_order_seq_cst_t */)
  65. {
  66. int res;
  67. __asm__ __volatile__
  68. (
  69. "ldar %w0, %w1\n\t"
  70. : "=r" (res)
  71. : "m" (*p)
  72. );
  73. return res;
  74. }
  75. /*
  76. * native word support
  77. */
  78. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, memory_order_relaxed_t)
  79. {
  80. ATOMIC_LOAD("ldr")
  81. }
  82. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, memory_order_acquire_t)
  83. {
  84. ATOMIC_LOAD("ldar")
  85. }
  86. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, int /* memory_order_seq_cst_t */)
  87. {
  88. ATOMIC_LOAD("ldar")
  89. }
  90. #define ATOMIC_STORE(opc) \
  91. __asm__ __volatile__ \
  92. ( \
  93. opc " %1, %0\n\t" \
  94. : "=m" (*p) \
  95. : "r" (v) \
  96. : "memory" \
  97. );
  98. /*
  99. * int support
  100. */
  101. static inline void atomic_store_explicit(volatile int* p, int v, memory_order_relaxed_t)
  102. {
  103. __asm__ __volatile__
  104. (
  105. "str %w1, %w0\n\t"
  106. : "=m" (*p)
  107. : "r" (v)
  108. : "memory"
  109. );
  110. }
  111. static inline void atomic_store_explicit(volatile int* p, int v, memory_order_release_t)
  112. {
  113. __asm__ __volatile__
  114. (
  115. "stlr %w1, %w0\n\t"
  116. : "=m" (*p)
  117. : "r" (v)
  118. : "memory"
  119. );
  120. }
  121. static inline void atomic_store_explicit(volatile int* p, int v, int /* memory_order_seq_cst_t */)
  122. {
  123. __asm__ __volatile__
  124. (
  125. "stlr %w1, %w0\n\t"
  126. : "=m" (*p)
  127. : "r" (v)
  128. : "memory"
  129. );
  130. }
  131. /*
  132. * native word support
  133. */
  134. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  135. {
  136. ATOMIC_STORE("str")
  137. }
  138. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  139. {
  140. ATOMIC_STORE("stlr")
  141. }
  142. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  143. {
  144. ATOMIC_STORE("stlr")
  145. }
  146. #define ATOMIC_XCHG(LD, ST) \
  147. atomic_word res; \
  148. atomic_word success; \
  149. __asm__ __volatile__ \
  150. ( \
  151. "0:\n\t" \
  152. LD " %2, [%4]\n\t" \
  153. ST " %w0, %3, [%4]\n\t" \
  154. "cbnz %w0, 0b\n\t" \
  155. : "=&r" (success), "+m" (*p), "=&r" (res) \
  156. : "r" (v), "r" (p) \
  157. : "memory" \
  158. ); \
  159. return res;
  160. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  161. {
  162. ATOMIC_XCHG("ldxr", "stxr")
  163. }
  164. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_acquire_t)
  165. {
  166. ATOMIC_XCHG("ldaxr", "stxr")
  167. }
  168. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  169. {
  170. ATOMIC_XCHG("ldxr", "stlxr")
  171. }
  172. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_acq_rel_t)
  173. {
  174. ATOMIC_XCHG("ldaxr", "stlxr")
  175. }
  176. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  177. {
  178. ATOMIC_XCHG("ldaxr", "stlxr")
  179. }
  180. // atomic_compare_exchange_weak_explicit: can fail spuriously even if *p == *oldval
  181. #define ATOMIC_CMP_XCHG(LD, ST) \
  182. atomic_word res; \
  183. atomic_word success = 0; \
  184. __asm__ __volatile__ \
  185. ( \
  186. LD " %2, [%4]\n\t" \
  187. "cmp %2, %5\n\t" \
  188. "b.ne 1f\n\t" \
  189. ST " %w0, %3, [%4]\n" \
  190. "1:\n\t" \
  191. "clrex\n\t" \
  192. : "=&r" (success), "+m" (*p), "=&r" (res) \
  193. : "r" (newval), "r" (p), "r" (*oldval) \
  194. : "cc", "memory" \
  195. ); \
  196. *oldval = res; \
  197. return success == 0;
  198. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_relaxed_t, memory_order_relaxed_t)
  199. {
  200. ATOMIC_CMP_XCHG("ldxr", "stxr")
  201. }
  202. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_relaxed_t)
  203. {
  204. ATOMIC_CMP_XCHG("ldaxr", "stxr")
  205. }
  206. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_relaxed_t)
  207. {
  208. ATOMIC_CMP_XCHG("ldxr", "stlxr")
  209. }
  210. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  211. {
  212. ATOMIC_CMP_XCHG("ldaxr", "stlxr")
  213. }
  214. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, memory_order_relaxed_t)
  215. {
  216. ATOMIC_CMP_XCHG("ldaxr", "stlxr")
  217. }
  218. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_acquire_t)
  219. {
  220. ATOMIC_CMP_XCHG("ldaxr", "stxr")
  221. }
  222. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_release_t)
  223. {
  224. ATOMIC_CMP_XCHG("ldxr", "stlxr")
  225. }
  226. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_acq_rel_t)
  227. {
  228. ATOMIC_CMP_XCHG("ldaxr", "stlxr")
  229. }
  230. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, int /* memory_order_seq_cst_t */)
  231. {
  232. ATOMIC_CMP_XCHG("ldaxr", "stlxr")
  233. }
  234. // atomic_compare_exchange_strong_explicit: does loop and only returns false if *p != *oldval
  235. #undef ATOMIC_CMP_XCHG
  236. #define ATOMIC_CMP_XCHG(LD, ST) \
  237. atomic_word res; \
  238. atomic_word success = 0; \
  239. __asm__ __volatile__ \
  240. ( \
  241. "0:\n\t" \
  242. LD " %2, [%4]\n\t" \
  243. "cmp %2, %5\n\t" \
  244. "b.ne 1f\n\t" \
  245. ST " %w0, %3, [%4]\n" \
  246. "cbnz %w0, 0b\n\t" \
  247. "1:\n\t" \
  248. "clrex\n\t" \
  249. : "=&r" (success), "+m" (*p), "=&r" (res) \
  250. : "r" (newval), "r" (p), "r" (*oldval) \
  251. : "cc", "memory" \
  252. ); \
  253. *oldval = res; \
  254. return success == 0;
  255. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_relaxed_t, memory_order_relaxed_t)
  256. {
  257. ATOMIC_CMP_XCHG("ldxr", "stxr")
  258. }
  259. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_relaxed_t)
  260. {
  261. ATOMIC_CMP_XCHG("ldaxr", "stxr")
  262. }
  263. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_relaxed_t)
  264. {
  265. ATOMIC_CMP_XCHG("ldxr", "stlxr")
  266. }
  267. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  268. {
  269. ATOMIC_CMP_XCHG("ldaxr", "stlxr")
  270. }
  271. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, memory_order_relaxed_t)
  272. {
  273. ATOMIC_CMP_XCHG("ldaxr", "stlxr")
  274. }
  275. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_acquire_t)
  276. {
  277. ATOMIC_CMP_XCHG("ldaxr", "stxr")
  278. }
  279. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_release_t)
  280. {
  281. ATOMIC_CMP_XCHG("ldxr", "stlxr")
  282. }
  283. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_acq_rel_t)
  284. {
  285. ATOMIC_CMP_XCHG("ldaxr", "stlxr")
  286. }
  287. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, int /* memory_order_seq_cst_t */)
  288. {
  289. ATOMIC_CMP_XCHG("ldaxr", "stlxr")
  290. }
  291. #define ATOMIC_PFIX_int "%w"
  292. #define ATOMIC_PFIX_atomic_word "%"
  293. #define ATOMIC_PFIX(WORD) ATOMIC_PFIX_##WORD
  294. #define ATOMIC_OP(WORD, LD, ST, OP) \
  295. long long res, tmp; \
  296. int success; \
  297. __asm__ __volatile__ \
  298. ( \
  299. "0:\n\t" \
  300. LD " " ATOMIC_PFIX(WORD) "2, [%5]\n\t" \
  301. OP " " ATOMIC_PFIX(WORD) "3, " ATOMIC_PFIX(WORD) "2, " ATOMIC_PFIX(WORD) "4\n\t"\
  302. ST " %w0, " ATOMIC_PFIX(WORD) "3, [%5]\n" \
  303. "cbnz %w0, 0b\n\t" \
  304. : "=&r" (success), "+m" (*p), "=&r" (res), "=&r" (tmp) \
  305. : "Ir" ((long long) v), "r" (p) \
  306. : "cc", "memory" \
  307. ); \
  308. return (WORD) res;
  309. static inline int atomic_fetch_add_explicit(volatile int* p, int v, memory_order_relaxed_t)
  310. {
  311. ATOMIC_OP(int, "ldxr", "stxr", "add")
  312. }
  313. static inline int atomic_fetch_add_explicit(volatile int* p, int v, memory_order_acquire_t)
  314. {
  315. ATOMIC_OP(int, "ldaxr", "stxr", "add")
  316. }
  317. static inline int atomic_fetch_add_explicit(volatile int* p, int v, memory_order_release_t)
  318. {
  319. ATOMIC_OP(int, "ldxr", "stlxr", "add")
  320. }
  321. static inline int atomic_fetch_add_explicit(volatile int* p, int v, memory_order_acq_rel_t)
  322. {
  323. ATOMIC_OP(int, "ldaxr", "stlxr", "add")
  324. }
  325. static inline int atomic_fetch_add_explicit(volatile int* p, int v, int /* memory_order_seq_cst_t */)
  326. {
  327. ATOMIC_OP(int, "ldaxr", "stlxr", "add")
  328. }
  329. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  330. {
  331. ATOMIC_OP(atomic_word, "ldxr", "stxr", "add")
  332. }
  333. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_acquire_t)
  334. {
  335. ATOMIC_OP(atomic_word, "ldaxr", "stxr", "add")
  336. }
  337. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  338. {
  339. ATOMIC_OP(atomic_word, "ldxr", "stlxr", "add")
  340. }
  341. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_acq_rel_t)
  342. {
  343. ATOMIC_OP(atomic_word, "ldaxr", "stlxr", "add")
  344. }
  345. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  346. {
  347. ATOMIC_OP(atomic_word, "ldaxr", "stlxr", "add")
  348. }
  349. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, memory_order_relaxed_t)
  350. {
  351. ATOMIC_OP(int, "ldxr", "stxr", "sub")
  352. }
  353. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, memory_order_acquire_t)
  354. {
  355. ATOMIC_OP(int, "ldaxr", "stxr", "sub")
  356. }
  357. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, memory_order_release_t)
  358. {
  359. ATOMIC_OP(int, "ldxr", "stlxr", "sub")
  360. }
  361. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, memory_order_acq_rel_t)
  362. {
  363. ATOMIC_OP(int, "ldaxr", "stlxr", "sub")
  364. }
  365. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, int /* memory_order_seq_cst_t */)
  366. {
  367. ATOMIC_OP(int, "ldaxr", "stlxr", "sub")
  368. }
  369. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  370. {
  371. ATOMIC_OP(atomic_word, "ldxr", "stxr", "sub")
  372. }
  373. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_acquire_t)
  374. {
  375. ATOMIC_OP(atomic_word, "ldaxr", "stxr", "sub")
  376. }
  377. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  378. {
  379. ATOMIC_OP(atomic_word, "ldxr", "stlxr", "sub")
  380. }
  381. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_acq_rel_t)
  382. {
  383. ATOMIC_OP(atomic_word, "ldaxr", "stlxr", "sub")
  384. }
  385. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  386. {
  387. ATOMIC_OP(atomic_word, "ldaxr", "stlxr", "sub")
  388. }
  389. /*
  390. * extensions
  391. */
  392. static inline void atomic_retain(volatile int* p)
  393. {
  394. atomic_fetch_add_explicit(p, 1, memory_order_relaxed);
  395. }
  396. static inline bool atomic_release(volatile int* p)
  397. {
  398. bool res = atomic_fetch_sub_explicit(p, 1, memory_order_release) == 1;
  399. if (res)
  400. {
  401. atomic_thread_fence(memory_order_acquire);
  402. }
  403. return res;
  404. }
  405. /*
  406. * double word
  407. */
  408. // Note: the only way to get atomic 128-bit memory accesses on ARM64 is to use ldxp/stxp with a loop
  409. // (ldxp and stxp instructions are not guaranteed to appear atomic)
  410. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_relaxed_t)
  411. {
  412. atomic_word2 v;
  413. atomic_word success;
  414. __asm__ __volatile__
  415. (
  416. "0:\n\t"
  417. "ldxp\t%1, %2, [%3]\n\t"
  418. "stxp\t%w0, %1, %2, [%3]\n\t"
  419. "cbnz\t%w0, 0b\n\t"
  420. : "=&r" (success), "=&r" (v.lo), "=&r" (v.hi)
  421. : "r" (p)
  422. );
  423. return v;
  424. }
  425. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_acquire_t)
  426. {
  427. atomic_word2 v;
  428. atomic_word success;
  429. __asm__ __volatile__
  430. (
  431. "0:\n\t"
  432. "ldaxp\t%1, %2, [%3]\n\t"
  433. "stxp\t%w0, %1, %2, [%3]\n\t"
  434. "cbnz\t%w0, 0b\n\t"
  435. : "=&r" (success), "=&r" (v.lo), "=&r" (v.hi)
  436. : "r" (p)
  437. );
  438. return v;
  439. }
  440. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_relaxed_t)
  441. {
  442. atomic_word lo;
  443. atomic_word hi;
  444. atomic_word success;
  445. __asm__ __volatile__
  446. (
  447. "0:\n\t"
  448. "ldxp\t%2, %3, [%6]\n\t"
  449. "stxp\t%w0, %4, %5, [%6]\n\t"
  450. "cbnz\t%w0, 0b\n\t"
  451. : "=&r" (success), "=m" (*p), "=&r" (lo), "=&r" (hi)
  452. : "r" (v.lo), "r" (v.hi), "r" (p)
  453. : "memory"
  454. );
  455. }
  456. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_release_t)
  457. {
  458. atomic_word lo;
  459. atomic_word hi;
  460. atomic_word success;
  461. __asm__ __volatile__
  462. (
  463. "0:\n\t"
  464. "ldxp\t%2, %3, [%6]\n\t"
  465. "stlxp\t%w0, %4, %5, [%6]\n\t"
  466. "cbnz\t%w0, 0b\n\t"
  467. : "=&r" (success), "=m" (*p), "=&r" (lo), "=&r" (hi)
  468. : "r" (v.lo), "r" (v.hi), "r" (p)
  469. : "memory"
  470. );
  471. }
  472. static inline atomic_word2 atomic_exchange_explicit(volatile atomic_word2* p, atomic_word2 val, memory_order_acq_rel_t)
  473. {
  474. atomic_word2 oldval;
  475. atomic_word success;
  476. __asm__ __volatile__
  477. (
  478. "0:\n\t"
  479. "ldaxp\t%2, %3, [%6]\n\t"
  480. "stlxp\t%w0, %5, %4, [%6]\n\t"
  481. "cbnz\t%w0, 0b\n\t"
  482. : "=&r" (success), "+m" (*p), "=&r" (oldval.lo), "=&r" (oldval.hi)
  483. : "r" (val.hi), "r" (val.lo), "r" (p)
  484. : "memory"
  485. );
  486. return oldval;
  487. }
  488. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_acquire_t, memory_order_relaxed_t)
  489. {
  490. atomic_word lo = oldval->lo;
  491. atomic_word hi = oldval->hi;
  492. atomic_word success;
  493. __asm__ __volatile__
  494. (
  495. "0:\n\t"
  496. "ldaxp\t%2, %3, [%8]\n\t"
  497. "cmp\t%3, %5\n\t"
  498. "b.ne\t1f\n\t"
  499. "cmp\t%2, %4\n\t"
  500. "b.ne\t1f\n\t"
  501. "stxp\t%w0, %6, %7, [%8]\n\t"
  502. "cbnz\t%w0, 0b\n\t"
  503. "1:\n\t"
  504. "clrex\n\t"
  505. : "=&r" (success), "+m" (*p), "=&r" (oldval->lo), "=&r" (oldval->hi)
  506. : "r" (lo), "r" (hi), "r" (newval.lo), "r" (newval.hi), "r" (p), "0" (1)
  507. : "cc", "memory"
  508. );
  509. return success == 0;
  510. }
  511. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_release_t, memory_order_relaxed_t)
  512. {
  513. atomic_word lo = oldval->lo;
  514. atomic_word hi = oldval->hi;
  515. atomic_word success;
  516. __asm__ __volatile__
  517. (
  518. "0:\n\t"
  519. "ldxp\t%2, %3, [%8]\n\t"
  520. "cmp\t%3, %5\n\t"
  521. "b.ne\t1f\n\t"
  522. "cmp\t%2, %4\n\t"
  523. "b.ne\t1f\n\t"
  524. "stlxp\t%w0, %6, %7, [%8]\n\t"
  525. "cbnz\t%w0, 0b\n\t"
  526. "1:\n\t"
  527. "clrex\n\t"
  528. : "=&r" (success), "+m" (*p), "=&r" (oldval->lo), "=&r" (oldval->hi)
  529. : "r" (lo), "r" (hi), "r" (newval.lo), "r" (newval.hi), "r" (p), "0" (1)
  530. : "cc", "memory"
  531. );
  532. return success == 0;
  533. }
  534. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, int /*memory_order_acq_rel_t*/, memory_order_relaxed_t)
  535. {
  536. atomic_word lo = oldval->lo;
  537. atomic_word hi = oldval->hi;
  538. atomic_word success;
  539. __asm__ __volatile__
  540. (
  541. "0:\n\t"
  542. "ldaxp\t%2, %3, [%8]\n\t"
  543. "cmp\t%3, %5\n\t"
  544. "b.ne\t1f\n\t"
  545. "cmp\t%2, %4\n\t"
  546. "b.ne\t1f\n\t"
  547. "stlxp\t%w0, %6, %7, [%8]\n\t"
  548. "cbnz\t%w0, 0b\n\t"
  549. "1:\n\t"
  550. "clrex\n\t"
  551. : "=&r" (success), "+m" (*p), "=&r" (oldval->lo), "=&r" (oldval->hi)
  552. : "r" (lo), "r" (hi), "r" (newval.lo), "r" (newval.hi), "r" (p), "0" (1)
  553. : "cc", "memory"
  554. );
  555. return success == 0;
  556. }