Skip to content

Commit c342f78

Browse files
committed
arm64: cmpxchg: patch in lse instructions when supported by the CPU
On CPUs which support the LSE atomic instructions introduced in ARMv8.1, it makes sense to use them in preference to ll/sc sequences. This patch introduces runtime patching of our cmpxchg primitives so that the LSE cas instruction is used instead. Reviewed-by: Catalin Marinas <[email protected]> Signed-off-by: Will Deacon <[email protected]>
1 parent c8366ba commit c342f78

File tree

4 files changed

+98
-66
lines changed

4 files changed

+98
-66
lines changed

arch/arm64/include/asm/atomic.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <linux/types.h>
2525

2626
#include <asm/barrier.h>
27-
#include <asm/cmpxchg.h>
2827
#include <asm/lse.h>
2928

3029
#define ATOMIC_INIT(i) { (i) }
@@ -41,6 +40,8 @@
4140

4241
#undef __ARM64_IN_ATOMIC_IMPL
4342

43+
#include <asm/cmpxchg.h>
44+
4445
/*
4546
* On ARM, ordinary assignment (str instruction) doesn't clear the local
4647
* strex/ldrex monitor on some implementations. The reason we can use it for

arch/arm64/include/asm/atomic_ll_sc.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,4 +215,42 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
215215
}
216216
__LL_SC_EXPORT(atomic64_dec_if_positive);
217217

218+
#define __CMPXCHG_CASE(w, sz, name, mb, cl) \
219+
__LL_SC_INLINE unsigned long \
220+
__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
221+
unsigned long old, \
222+
unsigned long new)) \
223+
{ \
224+
unsigned long tmp, oldval; \
225+
\
226+
asm volatile( \
227+
" " #mb "\n" \
228+
"1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \
229+
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
230+
" cbnz %" #w "[tmp], 2f\n" \
231+
" stxr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \
232+
" cbnz %w[tmp], 1b\n" \
233+
" " #mb "\n" \
234+
" mov %" #w "[oldval], %" #w "[old]\n" \
235+
"2:" \
236+
: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
237+
[v] "+Q" (*(unsigned long *)ptr) \
238+
: [old] "Lr" (old), [new] "r" (new) \
239+
: cl); \
240+
\
241+
return oldval; \
242+
} \
243+
__LL_SC_EXPORT(__cmpxchg_case_##name);
244+
245+
__CMPXCHG_CASE(w, b, 1, , )
246+
__CMPXCHG_CASE(w, h, 2, , )
247+
__CMPXCHG_CASE(w, , 4, , )
248+
__CMPXCHG_CASE( , , 8, , )
249+
__CMPXCHG_CASE(w, b, mb_1, dmb ish, "memory")
250+
__CMPXCHG_CASE(w, h, mb_2, dmb ish, "memory")
251+
__CMPXCHG_CASE(w, , mb_4, dmb ish, "memory")
252+
__CMPXCHG_CASE( , , mb_8, dmb ish, "memory")
253+
254+
#undef __CMPXCHG_CASE
255+
218256
#endif /* __ASM_ATOMIC_LL_SC_H */

arch/arm64/include/asm/atomic_lse.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,4 +349,43 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
349349

350350
#undef __LL_SC_ATOMIC64
351351

352+
#define __LL_SC_CMPXCHG(op) __LL_SC_CALL(__cmpxchg_case_##op)
353+
354+
#define __CMPXCHG_CASE(w, sz, name, mb, cl...) \
355+
static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
356+
unsigned long old, \
357+
unsigned long new) \
358+
{ \
359+
register unsigned long x0 asm ("x0") = (unsigned long)ptr; \
360+
register unsigned long x1 asm ("x1") = old; \
361+
register unsigned long x2 asm ("x2") = new; \
362+
\
363+
asm volatile(ARM64_LSE_ATOMIC_INSN( \
364+
/* LL/SC */ \
365+
"nop\n" \
366+
__LL_SC_CMPXCHG(name) \
367+
"nop", \
368+
/* LSE atomics */ \
369+
" mov " #w "30, %" #w "[old]\n" \
370+
" cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \
371+
" mov %" #w "[ret], " #w "30") \
372+
: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \
373+
: [old] "r" (x1), [new] "r" (x2) \
374+
: "x30" , ##cl); \
375+
\
376+
return x0; \
377+
}
378+
379+
__CMPXCHG_CASE(w, b, 1, )
380+
__CMPXCHG_CASE(w, h, 2, )
381+
__CMPXCHG_CASE(w, , 4, )
382+
__CMPXCHG_CASE(x, , 8, )
383+
__CMPXCHG_CASE(w, b, mb_1, al, "memory")
384+
__CMPXCHG_CASE(w, h, mb_2, al, "memory")
385+
__CMPXCHG_CASE(w, , mb_4, al, "memory")
386+
__CMPXCHG_CASE(x, , mb_8, al, "memory")
387+
388+
#undef __LL_SC_CMPXCHG
389+
#undef __CMPXCHG_CASE
390+
352391
#endif /* __ASM_ATOMIC_LSE_H */

arch/arm64/include/asm/cmpxchg.h

Lines changed: 19 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <linux/bug.h>
2222
#include <linux/mmdebug.h>
2323

24+
#include <asm/atomic.h>
2425
#include <asm/barrier.h>
2526
#include <asm/lse.h>
2627

@@ -111,74 +112,20 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
111112
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
112113
unsigned long new, int size)
113114
{
114-
unsigned long oldval = 0, res;
115-
116115
switch (size) {
117116
case 1:
118-
do {
119-
asm volatile("// __cmpxchg1\n"
120-
" ldxrb %w1, %2\n"
121-
" mov %w0, #0\n"
122-
" cmp %w1, %w3\n"
123-
" b.ne 1f\n"
124-
" stxrb %w0, %w4, %2\n"
125-
"1:\n"
126-
: "=&r" (res), "=&r" (oldval), "+Q" (*(u8 *)ptr)
127-
: "Ir" (old), "r" (new)
128-
: "cc");
129-
} while (res);
130-
break;
131-
117+
return __cmpxchg_case_1(ptr, old, new);
132118
case 2:
133-
do {
134-
asm volatile("// __cmpxchg2\n"
135-
" ldxrh %w1, %2\n"
136-
" mov %w0, #0\n"
137-
" cmp %w1, %w3\n"
138-
" b.ne 1f\n"
139-
" stxrh %w0, %w4, %2\n"
140-
"1:\n"
141-
: "=&r" (res), "=&r" (oldval), "+Q" (*(u16 *)ptr)
142-
: "Ir" (old), "r" (new)
143-
: "cc");
144-
} while (res);
145-
break;
146-
119+
return __cmpxchg_case_2(ptr, old, new);
147120
case 4:
148-
do {
149-
asm volatile("// __cmpxchg4\n"
150-
" ldxr %w1, %2\n"
151-
" mov %w0, #0\n"
152-
" cmp %w1, %w3\n"
153-
" b.ne 1f\n"
154-
" stxr %w0, %w4, %2\n"
155-
"1:\n"
156-
: "=&r" (res), "=&r" (oldval), "+Q" (*(u32 *)ptr)
157-
: "Ir" (old), "r" (new)
158-
: "cc");
159-
} while (res);
160-
break;
161-
121+
return __cmpxchg_case_4(ptr, old, new);
162122
case 8:
163-
do {
164-
asm volatile("// __cmpxchg8\n"
165-
" ldxr %1, %2\n"
166-
" mov %w0, #0\n"
167-
" cmp %1, %3\n"
168-
" b.ne 1f\n"
169-
" stxr %w0, %4, %2\n"
170-
"1:\n"
171-
: "=&r" (res), "=&r" (oldval), "+Q" (*(u64 *)ptr)
172-
: "Ir" (old), "r" (new)
173-
: "cc");
174-
} while (res);
175-
break;
176-
123+
return __cmpxchg_case_8(ptr, old, new);
177124
default:
178125
BUILD_BUG();
179126
}
180127

181-
return oldval;
128+
unreachable();
182129
}
183130

184131
#define system_has_cmpxchg_double() 1
@@ -229,13 +176,20 @@ static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2,
229176
static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
230177
unsigned long new, int size)
231178
{
232-
unsigned long ret;
233-
234-
smp_mb();
235-
ret = __cmpxchg(ptr, old, new, size);
236-
smp_mb();
179+
switch (size) {
180+
case 1:
181+
return __cmpxchg_case_mb_1(ptr, old, new);
182+
case 2:
183+
return __cmpxchg_case_mb_2(ptr, old, new);
184+
case 4:
185+
return __cmpxchg_case_mb_4(ptr, old, new);
186+
case 8:
187+
return __cmpxchg_case_mb_8(ptr, old, new);
188+
default:
189+
BUILD_BUG();
190+
}
237191

238-
return ret;
192+
unreachable();
239193
}
240194

241195
#define cmpxchg(ptr, o, n) \

0 commit comments

Comments
 (0)