[llvm] r218331 - [Power] Use AtomicExpandPass for fence insertion, and use lwsync where appropriate
Hal Finkel
hfinkel at anl.gov
Thu Oct 2 15:46:04 PDT 2014
----- Original Message -----
> From: "Robin Morisset" <morisset at google.com>
> To: llvm-commits at cs.uiuc.edu
> Sent: Tuesday, September 23, 2014 3:46:50 PM
> Subject: [llvm] r218331 - [Power] Use AtomicExpandPass for fence insertion, and use lwsync where appropriate
>
> Author: morisset
> Date: Tue Sep 23 15:46:49 2014
> New Revision: 218331
>
> URL: http://llvm.org/viewvc/llvm-project?rev=218331&view=rev
> Log:
> [Power] Use AtomicExpandPass for fence insertion, and use lwsync
> where appropriate
>
> Summary:
> This patch makes use of AtomicExpandPass in Power for inserting
> fences around
> atomic as part of an effort to remove fence insertion from
> SelectionDAGBuilder.
> As a big bonus, it lets us use sync 1 (lightweight sync, often used
> by the mnemonic
> lwsync) instead of sync 0 (heavyweight sync) in many cases.
>
> I also added a test, as there was no test for the barriers emitted by
> the Power
> backend for atomic loads and stores.
>
> Test Plan: new test + make check-all
>
> Reviewers: jfb
>
> Subscribers: llvm-commits
>
> Differential Revision: http://reviews.llvm.org/D5180
>
> Added:
> llvm/trunk/test/CodeGen/PowerPC/atomics.ll
> Modified:
> llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td
> llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
> llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
> llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
> llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
>
> Modified: llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td?rev=218331&r1=218330&r2=218331&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td Tue Sep 23
> 15:46:49 2014
> @@ -28,8 +28,10 @@ let TargetPrefix = "ppc" in { // All in
> def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>;
> def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
>
> - // sync instruction
> + // sync instruction (i.e. sync 0, a.k.a hwsync)
> def int_ppc_sync : Intrinsic<[], [], []>;
> + // lwsync is sync 1
> + def int_ppc_lwsync : Intrinsic<[], [], []>;
>
> // Intrinsics used to generate ctr-based loops. These should only
> be
> // generated by the PowerPC backend!
>
> Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=218331&r1=218330&r2=218331&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Sep 23
> 15:46:49 2014
> @@ -6538,6 +6538,38 @@ void PPCTargetLowering::ReplaceNodeResul
> // Other Lowering Code
> //===----------------------------------------------------------------------===//
>
> +static Instruction* callIntrinsic(IRBuilder<> &Builder,
> Intrinsic::ID Id) {
> + Module *M = Builder.GetInsertBlock()->getParent()->getParent();
> + Function *Func = Intrinsic::getDeclaration(M, Id);
> + return Builder.CreateCall(Func);
> +}
> +
> +// The mappings for emitLeading/TrailingFence is taken from
> +// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
> +Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<>
> &Builder,
> + AtomicOrdering Ord, bool
> IsStore,
> + bool IsLoad) const {
> + if (Ord == SequentiallyConsistent)
> + return callIntrinsic(Builder, Intrinsic::ppc_sync);
> + else if (isAtLeastRelease(Ord))
> + return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
> + else
> + return nullptr;
> +}
> +
> +Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<>
> &Builder,
> + AtomicOrdering Ord, bool
> IsStore,
> + bool IsLoad) const {
> + if (IsLoad && isAtLeastAcquire(Ord))
> + return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
> + // FIXME: this is too conservative, a dependent branch + isync is
> enough.
> + // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
> + //
> http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
> + // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
> + else
> + return nullptr;
> +}
> +
> MachineBasicBlock *
> PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI,
> MachineBasicBlock *BB,
> bool is64bit, unsigned
> BinOpcode) const {
>
> Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=218331&r1=218330&r2=218331&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Sep 23
> 15:46:49 2014
> @@ -413,6 +413,11 @@ namespace llvm {
> const SelectionDAG &DAG,
> unsigned Depth = 0) const
> override;
>
> + Instruction* emitLeadingFence(IRBuilder<> &Builder,
> AtomicOrdering Ord,
> + bool IsStore, bool IsLoad) const
> override;
> + Instruction* emitTrailingFence(IRBuilder<> &Builder,
> AtomicOrdering Ord,
> + bool IsStore, bool IsLoad) const
> override;
> +
> MachineBasicBlock *
> EmitInstrWithCustomInserter(MachineInstr *MI,
> MachineBasicBlock *MBB) const
> override;
>
> Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=218331&r1=218330&r2=218331&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Tue Sep 23 15:46:49
> 2014
> @@ -1704,8 +1704,13 @@ let isCodeGenOnly = 1 in {
> }
> }
>
> -def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
> -def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
> +// FIXME: We should use a specific flag to check for the presence of
> the sync
> +// instruction instead of relying on IsBookE.
> +// For example, the PPC A2 supports SYNC despite being a BookE
> processor.
Fixed in r218923.
-Hal
> +def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
> +def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[IsNotBookE]>;
> +def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
> +def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[IsBookE]>;
>
> //===----------------------------------------------------------------------===//
> // PPC32 Arithmetic Instructions.
>
> Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp?rev=218331&r1=218330&r2=218331&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp Tue Sep 23
> 15:46:49 2014
> @@ -86,6 +86,7 @@ public:
> return *getPPCTargetMachine().getSubtargetImpl();
> }
>
> + void addIRPasses() override;
> bool addPreISel() override;
> bool addILPOpts() override;
> bool addInstSelector() override;
> @@ -99,6 +100,11 @@ TargetPassConfig *PPCTargetMachine::crea
> return new PPCPassConfig(this, PM);
> }
>
> +void PPCPassConfig::addIRPasses() {
> + addPass(createAtomicExpandPass(&getPPCTargetMachine()));
> + TargetPassConfig::addIRPasses();
> +}
> +
> bool PPCPassConfig::addPreISel() {
> if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
> addPass(createPPCCTRLoops(getPPCTargetMachine()));
>
> Added: llvm/trunk/test/CodeGen/PowerPC/atomics.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/atomics.ll?rev=218331&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/atomics.ll (added)
> +++ llvm/trunk/test/CodeGen/PowerPC/atomics.ll Tue Sep 23 15:46:49
> 2014
> @@ -0,0 +1,125 @@
> +; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32
> -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
> --check-prefix=PPC32
> +; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched
> register/instruction).
> +; This is already checked for in Atomics-64.ll
> +; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 |
> FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
> +
> +; FIXME: we don't currently check for the operations themselves with
> CHECK-NEXT,
> +; because they are implemented in a very messy way with
> lwarx/stwcx.
> +; It should be fixed soon in another patch.
> +
> +; We first check loads, for all sizes from i8 to i64.
> +; We also vary orderings to check for barriers.
> +define i8 @load_i8_unordered(i8* %mem) {
> +; CHECK-LABEL: load_i8_unordered
> +; CHECK-NOT: sync
> + %val = load atomic i8* %mem unordered, align 1
> + ret i8 %val
> +}
> +define i16 @load_i16_monotonic(i16* %mem) {
> +; CHECK-LABEL: load_i16_monotonic
> +; CHECK-NOT: sync
> + %val = load atomic i16* %mem monotonic, align 2
> + ret i16 %val
> +}
> +define i32 @load_i32_acquire(i32* %mem) {
> +; CHECK-LABEL: load_i32_acquire
> + %val = load atomic i32* %mem acquire, align 4
> +; CHECK: sync 1
> + ret i32 %val
> +}
> +define i64 @load_i64_seq_cst(i64* %mem) {
> +; CHECK-LABEL: load_i64_seq_cst
> +; CHECK: sync 0
> + %val = load atomic i64* %mem seq_cst, align 8
> +; CHECK: sync 1
> + ret i64 %val
> +}
> +
> +; Stores
> +define void @store_i8_unordered(i8* %mem) {
> +; CHECK-LABEL: store_i8_unordered
> +; CHECK-NOT: sync
> + store atomic i8 42, i8* %mem unordered, align 1
> + ret void
> +}
> +define void @store_i16_monotonic(i16* %mem) {
> +; CHECK-LABEL: store_i16_monotonic
> +; CHECK-NOT: sync
> + store atomic i16 42, i16* %mem monotonic, align 2
> + ret void
> +}
> +define void @store_i32_release(i32* %mem) {
> +; CHECK-LABEL: store_i32_release
> +; CHECK: sync 1
> + store atomic i32 42, i32* %mem release, align 4
> + ret void
> +}
> +define void @store_i64_seq_cst(i64* %mem) {
> +; CHECK-LABEL: store_i64_seq_cst
> +; CHECK: sync 0
> + store atomic i64 42, i64* %mem seq_cst, align 8
> + ret void
> +}
> +
> +; Atomic CmpXchg
> +define i8 @cas_strong_i8_sc_sc(i8* %mem) {
> +; CHECK-LABEL: cas_strong_i8_sc_sc
> +; CHECK: sync 0
> + %val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst
> +; CHECK: sync 1
> + %loaded = extractvalue { i8, i1} %val, 0
> + ret i8 %loaded
> +}
> +define i16 @cas_weak_i16_acquire_acquire(i16* %mem) {
> +; CHECK-LABEL: cas_weak_i16_acquire_acquire
> +;CHECK-NOT: sync
> + %val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire
> +; CHECK: sync 1
> + %loaded = extractvalue { i16, i1} %val, 0
> + ret i16 %loaded
> +}
> +define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) {
> +; CHECK-LABEL: cas_strong_i32_acqrel_acquire
> +; CHECK: sync 1
> + %val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire
> +; CHECK: sync 1
> + %loaded = extractvalue { i32, i1} %val, 0
> + ret i32 %loaded
> +}
> +define i64 @cas_weak_i64_release_monotonic(i64* %mem) {
> +; CHECK-LABEL: cas_weak_i64_release_monotonic
> +; CHECK: sync 1
> + %val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic
> +; CHECK-NOT: [sync ]
> + %loaded = extractvalue { i64, i1} %val, 0
> + ret i64 %loaded
> +}
> +
> +; AtomicRMW
> +define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
> +; CHECK-LABEL: add_i8_monotonic
> +; CHECK-NOT: sync
> + %val = atomicrmw add i8* %mem, i8 %operand monotonic
> + ret i8 %val
> +}
> +define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
> +; CHECK-LABEL: xor_i16_seq_cst
> +; CHECK: sync 0
> + %val = atomicrmw xor i16* %mem, i16 %operand seq_cst
> +; CHECK: sync 1
> + ret i16 %val
> +}
> +define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) {
> +; CHECK-LABEL: xchg_i32_acq_rel
> +; CHECK: sync 1
> + %val = atomicrmw xchg i32* %mem, i32 %operand acq_rel
> +; CHECK: sync 1
> + ret i32 %val
> +}
> +define i64 @and_i64_release(i64* %mem, i64 %operand) {
> +; CHECK-LABEL: and_i64_release
> +; CHECK: sync 1
> + %val = atomicrmw and i64* %mem, i64 %operand release
> +; CHECK-NOT: [sync ]
> + ret i64 %val
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory
More information about the llvm-commits
mailing list