[llvm] r294410 - LSR: Check atomic instruction pointer operands
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 7 22:44:58 PST 2017
Author: arsenm
Date: Wed Feb 8 00:44:58 2017
New Revision: 294410
URL: http://llvm.org/viewvc/llvm-project?rev=294410&view=rev
Log:
LSR: Check atomic instruction pointer operands
Added:
llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=294410&r1=294409&r2=294410&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Wed Feb 8 00:44:58 2017
@@ -713,7 +713,7 @@ static GlobalValue *ExtractSymbol(const
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->getOperand(1) == OperandVal)
+ if (SI->getPointerOperand() == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
@@ -725,6 +725,12 @@ static bool isAddressUse(Instruction *In
isAddress = true;
break;
}
+ } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
+ if (RMW->getPointerOperand() == OperandVal)
+ isAddress = true;
+ } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
+ if (CmpX->getPointerOperand() == OperandVal)
+ isAddress = true;
}
return isAddress;
}
@@ -737,6 +743,10 @@ static MemAccessTy getAccessType(const I
AccessTy.AddrSpace = SI->getPointerAddressSpace();
} else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
AccessTy.AddrSpace = LI->getPointerAddressSpace();
+ } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
+ AccessTy.AddrSpace = RMW->getPointerAddressSpace();
+ } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
+ AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
}
// All pointers have the same requirements, so canonicalize them to an
Added: llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll?rev=294410&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll Wed Feb 8 00:44:58 2017
@@ -0,0 +1,87 @@
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; Make sure the pointer / address space of AtomicRMW is considered
+
+; OPT-LABEL: @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(
+
+; OPT-NOT: getelementptr
+
+; OPT: .lr.ph:
+; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
+; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
+; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
+; OPT: %tmp4 = atomicrmw add i32 addrspace(3)* %scevgep4, i32 undef seq_cst
+; OPT: %tmp7 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 undef seq_cst
+; OPT: %0 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 %tmp8 seq_cst
+; OPT: br i1 %exitcond
+define void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i32 %indvars.iv, 16383
+ %tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
+ %tmp4 = atomicrmw add i32 addrspace(3)* %tmp3, i32 undef seq_cst
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
+ %tmp7 = atomicrmw add i32 addrspace(3)* %tmp6, i32 undef seq_cst
+ %tmp8 = add nsw i32 %tmp7, %tmp4
+ atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(
+; OPT-NOT: getelementptr
+
+; OPT: .lr.ph:
+; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
+; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
+; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
+; OPT: %tmp4 = cmpxchg i32 addrspace(3)* %scevgep4, i32 undef, i32 undef seq_cst monotonic
+define void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i32 %indvars.iv, 16383
+ %tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
+ %tmp4 = cmpxchg i32 addrspace(3)* %tmp3, i32 undef, i32 undef seq_cst monotonic
+ %tmp4.0 = extractvalue { i32, i1 } %tmp4, 0
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
+ %tmp7 = cmpxchg i32 addrspace(3)* %tmp6, i32 undef, i32 undef seq_cst monotonic
+ %tmp7.0 = extractvalue { i32, i1 } %tmp7, 0
+ %tmp8 = add nsw i32 %tmp7.0, %tmp4.0
+ atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+attributes #0 = { nounwind }
\ No newline at end of file
More information about the llvm-commits
mailing list