ARGOBOTS  dce6e727ffc4ca5b3ffc04cb9517c6689be51ec5
abtd_asm_int128_cas.h
Go to the documentation of this file.
1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 /*
3  * See COPYRIGHT in top-level directory.
4  */
5 
6 #ifndef ABTD_ASM_INT128_CAS_H_INCLUDED
7 #define ABTD_ASM_INT128_CAS_H_INCLUDED
8 
9 #include <stdint.h>
10 
11 static inline int ABTD_asm_bool_cas_weak_int128(__int128 *var, __int128 oldv,
12  __int128 newv)
13 {
14 #if defined(__x86_64__)
15 
16  /*
17  * Use a compare-and-swap instruction.
18  * See src/lockfree/x86-64.h in https://github.com/ARM-software/progress64/
19  */
20  union u128_union {
21  struct {
22  uint64_t lo, hi;
23  } s;
24  __int128 ui;
25  };
26  union u128_union cmp, with;
27  cmp.ui = oldv;
28  with.ui = newv;
29  char result;
30  __asm__ __volatile__("lock cmpxchg16b %1\n"
31  "setz %0"
32  : "=&q"(result), "+m"(*var), "+d"(cmp.s.hi),
33  "+a"(cmp.s.lo)
34  : "c"(with.s.hi), "b"(with.s.lo)
35  : "memory", "cc");
36  return !!result;
37 
38 #elif defined(__aarch64__)
39 
40 #if 0
41  /*
42  * Use a compare-and-swap instruction.
43  * See src/lockfree/aarch64.h in https://github.com/ARM-software/progress64/
44  *
45  * I added "cc" since I am not 100% sure that these instructions do not
46  * modify the status flag. Since the following version has not been tested
47  * on a real machine, it is disabled.
48  */
49  __int128 prev;
50 #if __GNUC__ >= 9
51  /* This version needs further testing. */
52  __asm__ __volatile__("caspal %0, %H0, %1, %H1, [%2]"
53  : "+r"(oldv)
54  : "r"(newv), "r"(var)
55  : "memory", "cc");
56  prev = x0 | ((__int128)x1 << 64);
57 #else
58  __asm__ __volatile__("" ::: "memory");
59  register uint64_t x0 __asm__("x0") = (uint64_t)oldv;
60  register uint64_t x1 __asm__("x1") = (uint64_t)(oldv >> 64);
61  register uint64_t x2 __asm__("x2") = (uint64_t)newv;
62  register uint64_t x3 __asm__("x3") = (uint64_t)(newv >> 64);
63  __asm__ __volatile__("caspal x0, %[old2], %[newv1], %[newv2], [%[v]]"
64  : [old1] "+r"(x0), [old2] "+r"(x1)
65  : [newv1] "r"(x2), [newv2] "r"(x3), [v] "r"(var)
66  : "memory", "cc");
67  prev = x0 | ((__int128)x1 << 64);
68 #endif
69  return oldv == prev;
70 #else
71  /*
72  * Use exclusive load and store instructions (LL/SC).
73  * See src/lockfree/aarch64.h in https://github.com/ARM-software/progress64/
74  */
75  __int128 prev;
76  __asm__ __volatile__("ldaxp %0, %H0, [%1]"
77  : "=&r"(prev)
78  : "r"(var)
79  : "memory");
80  if (prev != oldv) {
81  /* Already rewritten. */
82  return 0;
83  }
84  uint32_t ret;
85  __asm__ __volatile__("stlxp %w0, %1, %H1, [%2]"
86  : "=&r"(ret)
87  : "r"(newv), "r"(var)
88  : "memory");
89  return !ret;
90 #endif
91 
92 #elif defined(__ppc64__) || defined(__PPC64__)
93 
94  /* Use "reserve-indexed" load and store (LL/SC) */
95  int ret = 0;
96  /* prev0 and newv0 must be even-indexed registers. */
97  register volatile uint64_t prev0 __asm__("r10");
98  register volatile uint64_t prev1 __asm__("r11");
99  register volatile uint64_t newv0 __asm__("r8") = (newv >> 64);
100  register volatile uint64_t newv1 __asm__("r9") = newv;
101  uint64_t oldv0 = (oldv >> 64);
102  uint64_t oldv1 = oldv;
103  __asm__ __volatile__("\n"
104  "\tlwsync\n"
105  "\tlqarx %[pv0], 0, %[ptr]\n"
106  "\tcmpd %[pv0], %[ov0]\n"
107  "\tbne 1f\n"
108  "\tcmpd %[pv1], %[ov1]\n"
109  "\tbne 1f\n"
110  "\tstqcx. %[nv0], 0, %[ptr]\n"
111  "\tbne 1f\n"
112  "\tli %[ret], 1\n"
113  "1:\n"
114  "\tisync\n"
115  : [pv0] "+&r"(prev0), [pv1] "+&r"(prev1),
116  [ret] "+&r"(ret)
117  : [ptr] "r"(var), [ov0] "r"(oldv0), [ov1] "r"(oldv1),
118  [nv0] "r"(newv0), [nv1] "r"(newv1)
119  : "memory", "cc");
120  return ret;
121 
122 #else
123 
124 #error "Argobots does not support 128-bit CAS for this architecture."
125 
126 #endif
127 }
128 
129 #endif /* ABTD_ASM_INT128_CAS_H_INCLUDED */