[llvm] 3a00e58 - AArch64: use indivisible cmpxchg for 128-bit atomic loads at O0
Tim Northover via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 22 06:20:46 PDT 2021
Author: Tim Northover
Date: 2021-09-22T14:20:43+01:00
New Revision: 3a00e58c2fca0c20d3792c897ef1ea54b6a168a0
URL: https://github.com/llvm/llvm-project/commit/3a00e58c2fca0c20d3792c897ef1ea54b6a168a0
DIFF: https://github.com/llvm/llvm-project/commit/3a00e58c2fca0c20d3792c897ef1ea54b6a168a0.diff
LOG: AArch64: use indivisible cmpxchg for 128-bit atomic loads at O0
Like normal atomicrmw operations, at -O0 the simple register-allocator can
insert spills into the LL/SC loop if it's expanded and visible when regalloc
runs. This can cause the operation to never succeed by repeatedly clearing the
monitor. Instead expand to a cmpxchg, which has a pseudo-instruction for -O0.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 186a954e75039..9aee359c57205 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17670,6 +17670,14 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
if (Size != 128 || isOpSuitableForLDPSTP(LI))
return AtomicExpansionKind::None;
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement atomicrmw without spilling. If the target address is also on the
+ // stack and close enough to the spill slot, this can lead to a situation
+ // where the monitor always gets cleared and the atomic operation can never
+ // succeed. So at -O0 lower this operation to a CAS loop.
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return AtomicExpansionKind::CmpXChg;
+
return AtomicExpansionKind::LLSC;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
index ecf197009407d..5b81be232ec03 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
@@ -373,60 +373,42 @@ define void @atomic_load_relaxed(i64, i64, i128* %p, i128* %p2) {
;
; CHECK-LLSC-O0-LABEL: atomic_load_relaxed:
; CHECK-LLSC-O0: // %bb.0:
-; CHECK-LLSC-O0-NEXT: sub sp, sp, #48
-; CHECK-LLSC-O0-NEXT: .cfi_def_cfa_offset 48
-; CHECK-LLSC-O0-NEXT: str x2, [sp, #32] // 8-byte Folded Spill
-; CHECK-LLSC-O0-NEXT: str x3, [sp, #40] // 8-byte Folded Spill
-; CHECK-LLSC-O0-NEXT: b .LBB4_1
-; CHECK-LLSC-O0-NEXT: .LBB4_1: // %atomicrmw.start
-; CHECK-LLSC-O0-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-LLSC-O0-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload
-; CHECK-LLSC-O0-NEXT: ldxp x9, x10, [x11]
-; CHECK-LLSC-O0-NEXT: mov x8, xzr
-; CHECK-LLSC-O0-NEXT: orr x9, x9, x8
-; CHECK-LLSC-O0-NEXT: orr x10, x8, x10
+; CHECK-LLSC-O0-NEXT: mov x11, xzr
+; CHECK-LLSC-O0-NEXT: .LBB4_1: // =>This Inner Loop Header: Depth=1
+; CHECK-LLSC-O0-NEXT: ldxp x9, x8, [x2]
+; CHECK-LLSC-O0-NEXT: cmp x9, x11
+; CHECK-LLSC-O0-NEXT: cset w10, ne
+; CHECK-LLSC-O0-NEXT: cmp x8, x11
+; CHECK-LLSC-O0-NEXT: cinc w10, w10, ne
+; CHECK-LLSC-O0-NEXT: cbnz w10, .LBB4_3
+; CHECK-LLSC-O0-NEXT: // %bb.2: // in Loop: Header=BB4_1 Depth=1
+; CHECK-LLSC-O0-NEXT: stxp w10, x11, x11, [x2]
+; CHECK-LLSC-O0-NEXT: cbnz w10, .LBB4_1
+; CHECK-LLSC-O0-NEXT: b .LBB4_4
+; CHECK-LLSC-O0-NEXT: .LBB4_3: // in Loop: Header=BB4_1 Depth=1
+; CHECK-LLSC-O0-NEXT: stxp w10, x9, x8, [x2]
+; CHECK-LLSC-O0-NEXT: cbnz w10, .LBB4_1
+; CHECK-LLSC-O0-NEXT: .LBB4_4:
; CHECK-LLSC-O0-NEXT: // implicit-def: $q0
; CHECK-LLSC-O0-NEXT: mov v0.d[0], x9
-; CHECK-LLSC-O0-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-LLSC-O0-NEXT: mov v0.d[1], x10
-; CHECK-LLSC-O0-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-LLSC-O0-NEXT: stxp w8, x9, x10, [x11]
-; CHECK-LLSC-O0-NEXT: cbnz w8, .LBB4_1
-; CHECK-LLSC-O0-NEXT: b .LBB4_2
-; CHECK-LLSC-O0-NEXT: .LBB4_2: // %atomicrmw.end
-; CHECK-LLSC-O0-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-LLSC-O0-NEXT: ldr x8, [sp, #40] // 8-byte Folded Reload
-; CHECK-LLSC-O0-NEXT: str q0, [x8]
-; CHECK-LLSC-O0-NEXT: add sp, sp, #48
+; CHECK-LLSC-O0-NEXT: mov v0.d[1], x8
+; CHECK-LLSC-O0-NEXT: str q0, [x3]
; CHECK-LLSC-O0-NEXT: ret
;
; CHECK-CAS-O0-LABEL: atomic_load_relaxed:
; CHECK-CAS-O0: // %bb.0:
-; CHECK-CAS-O0-NEXT: sub sp, sp, #48
-; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 48
-; CHECK-CAS-O0-NEXT: str x2, [sp, #32] // 8-byte Folded Spill
-; CHECK-CAS-O0-NEXT: str x3, [sp, #40] // 8-byte Folded Spill
-; CHECK-CAS-O0-NEXT: b .LBB4_1
-; CHECK-CAS-O0-NEXT: .LBB4_1: // %atomicrmw.start
-; CHECK-CAS-O0-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-CAS-O0-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload
-; CHECK-CAS-O0-NEXT: ldxp x9, x10, [x11]
; CHECK-CAS-O0-NEXT: mov x8, xzr
-; CHECK-CAS-O0-NEXT: orr x9, x9, x8
-; CHECK-CAS-O0-NEXT: orr x10, x8, x10
+; CHECK-CAS-O0-NEXT: mov x0, x8
+; CHECK-CAS-O0-NEXT: mov x1, x8
+; CHECK-CAS-O0-NEXT: mov x4, x8
+; CHECK-CAS-O0-NEXT: mov x5, x8
+; CHECK-CAS-O0-NEXT: casp x0, x1, x4, x5, [x2]
+; CHECK-CAS-O0-NEXT: mov x9, x0
+; CHECK-CAS-O0-NEXT: mov x8, x1
; CHECK-CAS-O0-NEXT: // implicit-def: $q0
; CHECK-CAS-O0-NEXT: mov v0.d[0], x9
-; CHECK-CAS-O0-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-CAS-O0-NEXT: mov v0.d[1], x10
-; CHECK-CAS-O0-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-CAS-O0-NEXT: stxp w8, x9, x10, [x11]
-; CHECK-CAS-O0-NEXT: cbnz w8, .LBB4_1
-; CHECK-CAS-O0-NEXT: b .LBB4_2
-; CHECK-CAS-O0-NEXT: .LBB4_2: // %atomicrmw.end
-; CHECK-CAS-O0-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-CAS-O0-NEXT: ldr x8, [sp, #40] // 8-byte Folded Reload
-; CHECK-CAS-O0-NEXT: str q0, [x8]
-; CHECK-CAS-O0-NEXT: add sp, sp, #48
+; CHECK-CAS-O0-NEXT: mov v0.d[1], x8
+; CHECK-CAS-O0-NEXT: str q0, [x3]
; CHECK-CAS-O0-NEXT: ret
%r = load atomic i128, i128* %p monotonic, align 16
More information about the llvm-commits
mailing list