[llvm] r248291 - [AArch64] Emit clrex in the expanded cmpxchg fail block.
Ahmed Bougacha via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 22 10:21:45 PDT 2015
Author: ab
Date: Tue Sep 22 12:21:44 2015
New Revision: 248291
URL: http://llvm.org/viewvc/llvm-project?rev=248291&view=rev
Log:
[AArch64] Emit clrex in the expanded cmpxchg fail block.
In the comparison failure block of a cmpxchg expansion, the initial
ldrex/ldxr will not be followed by a matching strex/stxr.
On ARM/AArch64, this unnecessarily ties up the execution monitor,
which might have a negative performance impact on some uarchs.
Instead, release the monitor in the failure block.
The clrex instruction was designed for this: use it.
Also see ARMARM v8-A B2.10.2:
"Exclusive access instructions and Shareable memory locations".
Differential Revision: http://reviews.llvm.org/D13033
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll
llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=248291&r1=248290&r2=248291&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue Sep 22 12:21:44 2015
@@ -1097,6 +1097,14 @@ public:
}
/// @}
+ // Emits code that executes when the comparison result in the ll/sc
+ // expansion of a cmpxchg instruction is such that the store-conditional will
+ // not execute. This makes it possible to balance out the load-linked with
+ // a dedicated instruction, if desired.
+ // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
+ // be unnecessarily held, except if clrex, inserted by this hook, is executed.
+ virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
+
/// Returns true if the given (atomic) store should be expanded by the
/// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
Modified: llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp?rev=248291&r1=248290&r2=248291&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp Tue Sep 22 12:21:44 2015
@@ -374,7 +374,7 @@ bool AtomicExpand::expandAtomicCmpXchg(A
// %loaded = @load.linked(%addr)
// %should_store = icmp eq %loaded, %desired
// br i1 %should_store, label %cmpxchg.trystore,
- // label %cmpxchg.failure
+ // label %cmpxchg.nostore
// cmpxchg.trystore:
// %stored = @store_conditional(%new, %addr)
// %success = icmp eq i32 %stored, 0
@@ -382,6 +382,9 @@ bool AtomicExpand::expandAtomicCmpXchg(A
// cmpxchg.success:
// fence?
// br label %cmpxchg.end
+ // cmpxchg.nostore:
+ // @load_linked_fail_balance()?
+ // br label %cmpxchg.failure
// cmpxchg.failure:
// fence?
// br label %cmpxchg.end
@@ -392,7 +395,8 @@ bool AtomicExpand::expandAtomicCmpXchg(A
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
- auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+ auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
@@ -416,7 +420,7 @@ bool AtomicExpand::expandAtomicCmpXchg(A
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
Builder.SetInsertPoint(TryStoreBB);
Value *StoreSuccess = TLI->emitStoreConditional(
@@ -432,6 +436,13 @@ bool AtomicExpand::expandAtomicCmpXchg(A
/*IsLoad=*/true);
Builder.CreateBr(ExitBB);
+ Builder.SetInsertPoint(NoStoreBB);
+ // In the failing case, where we don't execute the store-conditional, the
+ // target might want to balance out the load-linked with a dedicated
+ // instruction (e.g., on ARM, clearing the exclusive monitor).
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
+ Builder.CreateBr(FailureBB);
+
Builder.SetInsertPoint(FailureBB);
TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
/*IsLoad=*/true);
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=248291&r1=248290&r2=248291&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Tue Sep 22 12:21:44 2015
@@ -9682,6 +9682,13 @@ Value *AArch64TargetLowering::emitLoadLi
cast<PointerType>(Addr->getType())->getElementType());
}
+void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
+ IRBuilder<> &Builder) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Builder.CreateCall(
+ llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
+}
+
Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=248291&r1=248290&r2=248291&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Tue Sep 22 12:21:44 2015
@@ -348,6 +348,8 @@ public:
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
+ void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
+
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll?rev=248291&r1=248290&r2=248291&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll Tue Sep 22 12:21:44 2015
@@ -2,13 +2,17 @@
define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
; CHECK-LABEL: val_compare_and_swap:
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
+; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
+; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]]
; CHECK-NEXT: cmp [[RESULT]], w1
-; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x0]
-; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -17,13 +21,16 @@ define i32 @val_compare_and_swap(i32* %p
define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
; CHECK-LABEL: val_compare_and_swap_from_load:
; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2]
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[RESULT]], w1
-; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0]
-; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
%new = load i32, i32* %pnew
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
%val = extractvalue { i32, i1 } %pair, 0
@@ -32,13 +39,17 @@ define i32 @val_compare_and_swap_from_lo
define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
; CHECK-LABEL: val_compare_and_swap_rel:
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
+; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
+; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]
; CHECK-NEXT: cmp [[RESULT]], w1
-; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x0]
-; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -47,13 +58,16 @@ define i32 @val_compare_and_swap_rel(i32
define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
; CHECK-LABEL: val_compare_and_swap_64:
; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK-NEXT: ldxr [[RESULT:x[0-9]+]], [x[[ADDR]]]
; CHECK-NEXT: cmp [[RESULT]], x1
-; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]
-; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
%pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
%val = extractvalue { i64, i1 } %pair, 0
ret i64 %val
@@ -61,13 +75,13 @@ define i64 @val_compare_and_swap_64(i64*
define i32 @fetch_and_nand(i32* %p) #0 {
; CHECK-LABEL: fetch_and_nand:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0]
; CHECK: mvn [[TMP_REG:w[0-9]+]], w[[DEST_REG]]
; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], [[TMP_REG]], #0xfffffff8
; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
; CHECK: mov x0, x[[DEST_REG]]
%val = atomicrmw nand i32* %p, i32 7 release
ret i32 %val
@@ -76,12 +90,12 @@ define i32 @fetch_and_nand(i32* %p) #0 {
define i64 @fetch_and_nand_64(i64* %p) #0 {
; CHECK-LABEL: fetch_and_nand_64:
; CHECK: mov x[[ADDR:[0-9]+]], x0
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK: ldaxr x[[DEST_REG:[0-9]+]], [x[[ADDR]]]
; CHECK: mvn w[[TMP_REG:[0-9]+]], w[[DEST_REG]]
; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
%val = atomicrmw nand i64* %p, i64 7 acq_rel
ret i64 %val
@@ -90,12 +104,12 @@ define i64 @fetch_and_nand_64(i64* %p) #
define i32 @fetch_and_or(i32* %p) #0 {
; CHECK-LABEL: fetch_and_or:
; CHECK: movz [[OLDVAL_REG:w[0-9]+]], #0x5
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK: ldaxr w[[DEST_REG:[0-9]+]], [x0]
; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]]
; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
; CHECK: mov x0, x[[DEST_REG]]
%val = atomicrmw or i32* %p, i32 5 seq_cst
ret i32 %val
@@ -104,11 +118,11 @@ define i32 @fetch_and_or(i32* %p) #0 {
define i64 @fetch_and_or_64(i64* %p) #0 {
; CHECK: fetch_and_or_64:
; CHECK: mov x[[ADDR:[0-9]+]], x0
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7
; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
%val = atomicrmw or i64* %p, i64 7 monotonic
ret i64 %val
}
Modified: llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll?rev=248291&r1=248290&r2=248291&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll Tue Sep 22 12:21:44 2015
@@ -893,6 +893,8 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wa
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
; CHECK-NOT: dmb
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
@@ -916,6 +918,8 @@ define i16 @test_atomic_cmpxchg_i16(i16
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
; CHECK-NOT: dmb
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
@@ -927,21 +931,21 @@ define i32 @test_atomic_cmpxchg_i32(i32
%pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
%old = extractvalue { i32, i1 } %pair, 0
+; CHECK: mov {{[xw]}}[[WANTED:[0-9]+]], {{[xw]}}0
+
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
- ; w0 below is a reasonable guess but could change: it certainly comes into the
- ; function there.
-; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: cmp w[[OLD]], w[[WANTED]]
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
; CHECK-NOT: dmb
-
-; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
ret i32 %old
}
@@ -963,6 +967,8 @@ define void @test_atomic_cmpxchg_i64(i64
; As above, w1 is a reasonable guess.
; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
; CHECK-NOT: dmb
; CHECK: str x[[OLD]],
More information about the llvm-commits
mailing list