[llvm] r218331 - [Power] Use AtomicExpandPass for fence insertion, and use lwsync where appropriate
Robin Morisset
morisset at google.com
Tue Sep 23 13:46:50 PDT 2014
Author: morisset
Date: Tue Sep 23 15:46:49 2014
New Revision: 218331
URL: http://llvm.org/viewvc/llvm-project?rev=218331&view=rev
Log:
[Power] Use AtomicExpandPass for fence insertion, and use lwsync where appropriate
Summary:
This patch makes use of AtomicExpandPass in Power for inserting fences around
atomic as part of an effort to remove fence insertion from SelectionDAGBuilder.
As a big bonus, it lets us use sync 1 (lightweight sync, often used by the mnemonic
lwsync) instead of sync 0 (heavyweight sync) in many cases.
I also added a test, as there was no test for the barriers emitted by the Power
backend for atomic loads and stores.
Test Plan: new test + make check-all
Reviewers: jfb
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D5180
Added:
llvm/trunk/test/CodeGen/PowerPC/atomics.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
Modified: llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td?rev=218331&r1=218330&r2=218331&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td Tue Sep 23 15:46:49 2014
@@ -28,8 +28,10 @@ let TargetPrefix = "ppc" in { // All in
def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
- // sync instruction
+ // sync instruction (i.e. sync 0, a.k.a hwsync)
def int_ppc_sync : Intrinsic<[], [], []>;
+ // lwsync is sync 1
+ def int_ppc_lwsync : Intrinsic<[], [], []>;
// Intrinsics used to generate ctr-based loops. These should only be
// generated by the PowerPC backend!
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=218331&r1=218330&r2=218331&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Sep 23 15:46:49 2014
@@ -6538,6 +6538,38 @@ void PPCTargetLowering::ReplaceNodeResul
// Other Lowering Code
//===----------------------------------------------------------------------===//
+static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *Func = Intrinsic::getDeclaration(M, Id);
+ return Builder.CreateCall(Func);
+}
+
+// The mappings for emitLeading/TrailingFence is taken from
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (Ord == SequentiallyConsistent)
+ return callIntrinsic(Builder, Intrinsic::ppc_sync);
+ else if (isAtLeastRelease(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ else
+ return nullptr;
+}
+
+Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (IsLoad && isAtLeastAcquire(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ // FIXME: this is too conservative, a dependent branch + isync is enough.
+ // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+ // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+ // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ else
+ return nullptr;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
bool is64bit, unsigned BinOpcode) const {
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=218331&r1=218330&r2=218331&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Sep 23 15:46:49 2014
@@ -413,6 +413,11 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+ Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=218331&r1=218330&r2=218331&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Tue Sep 23 15:46:49 2014
@@ -1704,8 +1704,13 @@ let isCodeGenOnly = 1 in {
}
}
-def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
-def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
+// FIXME: We should use a specific flag to check for the presence of the sync
+// instruction instead of relying on IsBookE.
+// For example, the PPC A2 supports SYNC despite being a BookE processor.
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[IsNotBookE]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
+def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[IsBookE]>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.
Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp?rev=218331&r1=218330&r2=218331&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp Tue Sep 23 15:46:49 2014
@@ -86,6 +86,7 @@ public:
return *getPPCTargetMachine().getSubtargetImpl();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addILPOpts() override;
bool addInstSelector() override;
@@ -99,6 +100,11 @@ TargetPassConfig *PPCTargetMachine::crea
return new PPCPassConfig(this, PM);
}
+void PPCPassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass(&getPPCTargetMachine()));
+ TargetPassConfig::addIRPasses();
+}
+
bool PPCPassConfig::addPreISel() {
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createPPCCTRLoops(getPPCTargetMachine()));
Added: llvm/trunk/test/CodeGen/PowerPC/atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/atomics.ll?rev=218331&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/atomics.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/atomics.ll Tue Sep 23 15:46:49 2014
@@ -0,0 +1,125 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
+; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction).
+; This is already checked for in Atomics-64.ll
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
+
+; FIXME: we don't currently check for the operations themselves with CHECK-NEXT,
+; because they are implemented in a very messy way with lwarx/stwcx.
+; It should be fixed soon in another patch.
+
+; We first check loads, for all sizes from i8 to i64.
+; We also vary orderings to check for barriers.
+define i8 @load_i8_unordered(i8* %mem) {
+; CHECK-LABEL: load_i8_unordered
+; CHECK-NOT: sync
+ %val = load atomic i8* %mem unordered, align 1
+ ret i8 %val
+}
+define i16 @load_i16_monotonic(i16* %mem) {
+; CHECK-LABEL: load_i16_monotonic
+; CHECK-NOT: sync
+ %val = load atomic i16* %mem monotonic, align 2
+ ret i16 %val
+}
+define i32 @load_i32_acquire(i32* %mem) {
+; CHECK-LABEL: load_i32_acquire
+ %val = load atomic i32* %mem acquire, align 4
+; CHECK: sync 1
+ ret i32 %val
+}
+define i64 @load_i64_seq_cst(i64* %mem) {
+; CHECK-LABEL: load_i64_seq_cst
+; CHECK: sync 0
+ %val = load atomic i64* %mem seq_cst, align 8
+; CHECK: sync 1
+ ret i64 %val
+}
+
+; Stores
+define void @store_i8_unordered(i8* %mem) {
+; CHECK-LABEL: store_i8_unordered
+; CHECK-NOT: sync
+ store atomic i8 42, i8* %mem unordered, align 1
+ ret void
+}
+define void @store_i16_monotonic(i16* %mem) {
+; CHECK-LABEL: store_i16_monotonic
+; CHECK-NOT: sync
+ store atomic i16 42, i16* %mem monotonic, align 2
+ ret void
+}
+define void @store_i32_release(i32* %mem) {
+; CHECK-LABEL: store_i32_release
+; CHECK: sync 1
+ store atomic i32 42, i32* %mem release, align 4
+ ret void
+}
+define void @store_i64_seq_cst(i64* %mem) {
+; CHECK-LABEL: store_i64_seq_cst
+; CHECK: sync 0
+ store atomic i64 42, i64* %mem seq_cst, align 8
+ ret void
+}
+
+; Atomic CmpXchg
+define i8 @cas_strong_i8_sc_sc(i8* %mem) {
+; CHECK-LABEL: cas_strong_i8_sc_sc
+; CHECK: sync 0
+ %val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst
+; CHECK: sync 1
+ %loaded = extractvalue { i8, i1} %val, 0
+ ret i8 %loaded
+}
+define i16 @cas_weak_i16_acquire_acquire(i16* %mem) {
+; CHECK-LABEL: cas_weak_i16_acquire_acquire
+;CHECK-NOT: sync
+ %val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire
+; CHECK: sync 1
+ %loaded = extractvalue { i16, i1} %val, 0
+ ret i16 %loaded
+}
+define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) {
+; CHECK-LABEL: cas_strong_i32_acqrel_acquire
+; CHECK: sync 1
+ %val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire
+; CHECK: sync 1
+ %loaded = extractvalue { i32, i1} %val, 0
+ ret i32 %loaded
+}
+define i64 @cas_weak_i64_release_monotonic(i64* %mem) {
+; CHECK-LABEL: cas_weak_i64_release_monotonic
+; CHECK: sync 1
+ %val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic
+; CHECK-NOT: [sync ]
+ %loaded = extractvalue { i64, i1} %val, 0
+ ret i64 %loaded
+}
+
+; AtomicRMW
+define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
+; CHECK-LABEL: add_i8_monotonic
+; CHECK-NOT: sync
+ %val = atomicrmw add i8* %mem, i8 %operand monotonic
+ ret i8 %val
+}
+define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
+; CHECK-LABEL: xor_i16_seq_cst
+; CHECK: sync 0
+ %val = atomicrmw xor i16* %mem, i16 %operand seq_cst
+; CHECK: sync 1
+ ret i16 %val
+}
+define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) {
+; CHECK-LABEL: xchg_i32_acq_rel
+; CHECK: sync 1
+ %val = atomicrmw xchg i32* %mem, i32 %operand acq_rel
+; CHECK: sync 1
+ ret i32 %val
+}
+define i64 @and_i64_release(i64* %mem, i64 %operand) {
+; CHECK-LABEL: and_i64_release
+; CHECK: sync 1
+ %val = atomicrmw and i64* %mem, i64 %operand release
+; CHECK-NOT: [sync ]
+ ret i64 %val
+}
More information about the llvm-commits
mailing list