[llvm] aecdf15 - [ARM] Do not emit ldrexd/strexd on Cortex-M chips
Ayke van Laethem via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 4 12:56:11 PST 2021
Author: Ayke van Laethem
Date: 2021-02-04T21:55:34+01:00
New Revision: aecdf15cc7f866180dc769265b8183cad34bb33a
URL: https://github.com/llvm/llvm-project/commit/aecdf15cc7f866180dc769265b8183cad34bb33a
DIFF: https://github.com/llvm/llvm-project/commit/aecdf15cc7f866180dc769265b8183cad34bb33a.diff
LOG: [ARM] Do not emit ldrexd/strexd on Cortex-M chips
The ldrexd/strexd instructions are not supported on M-class chips, see
for example
https://developer.arm.com/documentation/dui0489/e/arm-and-thumb-instructions/memory-access-instructions/ldrex-and-strex
which says:
> All these 32-bit Thumb instructions are available in ARMv6T2 and
> above, except that LDREXD and STREXD are not available in the ARMv7-M
> architecture.
Looking at the ARMv8-M architecture, it appears that these instructions
aren't supported either. The Architecture Reference Manual lists
ldrex/strex but not ldrexd/strexd:
https://developer.arm.com/documentation/ddi0553/bn/
Godbolt example on LLVM 11.0.0, which incorrectly emits ldrexd/strexd
instructions: https://llvm.godbolt.org/z/5qqPnE
Differential Revision: https://reviews.llvm.org/D95891
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/ARM/atomic-64bit.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 33f5ce47e8dc..280f76bf0665 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -18752,6 +18752,8 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
: AtomicExpansionKind::None;
}
+// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
+// bits, and up to 64 bits on the non-M profiles.
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
@@ -18759,9 +18761,11 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
+ unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
bool HasAtomicCmpXchg =
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
- if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
+ if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
+ Size <= (Subtarget->isMClass() ? 32U : 64U))
return AtomicExpansionKind::LLSC;
return AtomicExpansionKind::None;
}
diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll
index 8841483c97a4..eadefcd23bc6 100644
--- a/llvm/test/CodeGen/ARM/atomic-64bit.ll
+++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll
@@ -2,6 +2,8 @@
; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE
; RUN: llc < %s -mtriple=armebv7 -target-abi apcs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
; RUN: llc < %s -mtriple=thumbebv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-BE
+; RUN: llc < %s -mtriple=armv7m--none-eabi | FileCheck %s --check-prefix=CHECK-M
+; RUN: llc < %s -mtriple=armv8m--none-eabi | FileCheck %s --check-prefix=CHECK-M
define i64 @test1(i64* %ptr, i64 %val) {
; CHECK-LABEL: test1:
@@ -28,6 +30,8 @@ define i64 @test1(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_fetch_and_add_8
+
%r = atomicrmw add i64* %ptr, i64 %val seq_cst
ret i64 %r
}
@@ -57,6 +61,8 @@ define i64 @test2(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_fetch_and_sub_8
+
%r = atomicrmw sub i64* %ptr, i64 %val seq_cst
ret i64 %r
}
@@ -86,6 +92,8 @@ define i64 @test3(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_fetch_and_and_8
+
%r = atomicrmw and i64* %ptr, i64 %val seq_cst
ret i64 %r
}
@@ -115,6 +123,8 @@ define i64 @test4(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_fetch_and_or_8
+
%r = atomicrmw or i64* %ptr, i64 %val seq_cst
ret i64 %r
}
@@ -144,6 +154,8 @@ define i64 @test5(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_fetch_and_xor_8
+
%r = atomicrmw xor i64* %ptr, i64 %val seq_cst
ret i64 %r
}
@@ -165,6 +177,8 @@ define i64 @test6(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_lock_test_and_set_8
+
%r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
ret i64 %r
}
@@ -199,12 +213,15 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK-THUMB: beq
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_val_compare_and_swap_8
+
%pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
%r = extractvalue { i64, i1 } %pair, 0
ret i64 %r
}
-; Compiles down to a single ldrexd
+; Compiles down to a single ldrexd, except on M class devices where ldrexd
+; isn't supported.
define i64 @test8(i64* %ptr) {
; CHECK-LABEL: test8:
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
@@ -220,12 +237,15 @@ define i64 @test8(i64* %ptr) {
; CHECK-THUMB-NOT: strexd
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_val_compare_and_swap_8
+
%r = load atomic i64, i64* %ptr seq_cst, align 8
ret i64 %r
}
; Compiles down to atomicrmw xchg; there really isn't any more efficient
-; way to write it.
+; way to write it. Except on M class devices, where ldrexd/strexd aren't
+; supported.
define void @test9(i64* %ptr, i64 %val) {
; CHECK-LABEL: test9:
; CHECK: dmb {{ish$}}
@@ -243,6 +263,8 @@ define void @test9(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_lock_test_and_set_8
+
store atomic i64 %val, i64* %ptr seq_cst, align 8
ret void
}
@@ -286,6 +308,8 @@ define i64 @test10(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_fetch_and_min_8
+
%r = atomicrmw min i64* %ptr, i64 %val seq_cst
ret i64 %r
}
@@ -329,6 +353,8 @@ define i64 @test11(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_fetch_and_umin_8
+
%r = atomicrmw umin i64* %ptr, i64 %val seq_cst
ret i64 %r
}
@@ -372,6 +398,8 @@ define i64 @test12(i64* %ptr, i64 %val) {
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+; CHECK-M: __sync_fetch_and_max_8
+
%r = atomicrmw max i64* %ptr, i64 %val seq_cst
ret i64 %r
}
@@ -414,6 +442,9 @@ define i64 @test13(i64* %ptr, i64 %val) {
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
+
+; CHECK-M: __sync_fetch_and_umax_8
+
%r = atomicrmw umax i64* %ptr, i64 %val seq_cst
ret i64 %r
}
More information about the llvm-commits
mailing list