[llvm] r293887 - [LICM] Hoist loads that are dominated by invariant.start intrinsic, and are invariant in the loop.
Anna Thomas via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 2 05:22:04 PST 2017
Author: annat
Date: Thu Feb 2 07:22:03 2017
New Revision: 293887
URL: http://llvm.org/viewvc/llvm-project?rev=293887&view=rev
Log:
[LICM] Hoist loads that are dominated by invariant.start intrinsic, and are invariant in the loop.
Summary:
We can hoist out loads that are dominated by invariant.start, to the preheader.
We conservatively assume the load is variant, if we see a corresponding
use of invariant.start (it could be an invariant.end or an escaping
call).
Reviewers: mkuper, sanjoy, reames
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D29331
Modified:
llvm/trunk/lib/Transforms/Scalar/LICM.cpp
llvm/trunk/test/Transforms/LICM/hoisting.ll
Modified: llvm/trunk/lib/Transforms/Scalar/LICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LICM.cpp?rev=293887&r1=293886&r2=293887&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LICM.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp Thu Feb 2 07:22:03 2017
@@ -81,6 +81,11 @@ static cl::opt<bool>
DisablePromotion("disable-licm-promotion", cl::Hidden,
cl::desc("Disable memory promotion in LICM pass"));
+static cl::opt<uint32_t> MaxNumUsesTraversed(
+ "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
+ cl::desc("Max num uses visited for identifying load "
+ "invariance in loop using invariant start (default = 8)"));
+
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo);
@@ -480,6 +485,59 @@ void llvm::computeLoopSafetyInfo(LoopSaf
SafetyInfo->BlockColors = colorEHFunclets(*Fn);
}
+// Return true if LI is invariant within scope of the loop. LI is invariant if
+// CurLoop is dominated by an invariant.start representing the same memory location
+// and size as the memory location LI loads from, and also the invariant.start
+// has no uses.
+static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
+ Loop *CurLoop) {
+ Value *Addr = LI->getOperand(0);
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
+ cast<PointerType>(Addr->getType())->getElementType());
+
+ // if the type is i8 addrspace(x)*, we know this is the type of
+ // llvm.invariant.start operand
+ auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
+ LI->getPointerAddressSpace());
+ unsigned BitcastsVisited = 0;
+ // Look through bitcasts until we reach the i8* type (this is invariant.start
+ // operand type).
+ while (Addr->getType() != PtrInt8Ty) {
+ auto *BC = dyn_cast<BitCastInst>(Addr);
+ // Avoid traversing high number of bitcast uses.
+ if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
+ return false;
+ Addr = BC->getOperand(0);
+ }
+
+ unsigned UsesVisited = 0;
+ // Traverse all uses of the load operand value, to see if invariant.start is
+ // one of the uses, and whether it dominates the load instruction.
+ for (auto *U : Addr->users()) {
+ // Avoid traversing for Load operand with high number of users.
+ if (++UsesVisited > MaxNumUsesTraversed)
+ return false;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
+ // If there are escaping uses of invariant.start instruction, the load maybe
+ // non-invariant.
+ if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
+ II->hasNUsesOrMore(1))
+ continue;
+ unsigned InvariantSizeInBits =
+ cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
+ // Confirm the invariant.start location size contains the load operand size
+ // in bits. Also, the invariant.start should dominate the load, and we
+ // should not hoist the load out of a loop that contains this dominating
+ // invariant.start.
+ if (LocSizeInBits <= InvariantSizeInBits &&
+ DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
+ return true;
+ }
+
+ return false;
+}
+
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
LoopSafetyInfo *SafetyInfo,
@@ -496,6 +554,10 @@ bool llvm::canSinkOrHoistInst(Instructio
if (LI->getMetadata(LLVMContext::MD_invariant_load))
return true;
+ // This checks for an invariant.start dominating the load.
+ if (isLoadInvariantInLoop(LI, DT, CurLoop))
+ return true;
+
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
if (LI->getType()->isSized())
Modified: llvm/trunk/test/Transforms/LICM/hoisting.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LICM/hoisting.ll?rev=293887&r1=293886&r2=293887&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LICM/hoisting.ll (original)
+++ llvm/trunk/test/Transforms/LICM/hoisting.ll Thu Feb 2 07:22:03 2017
@@ -149,3 +149,174 @@ latch:
return:
ret i32 %sum
}
+
+declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
+declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
+declare void @escaping.invariant.start({}*) nounwind
+; invariant.start dominates the load, and in this scope, the
+; load is invariant. So, we can hoist the `addrld` load out of the loop.
+define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence
+; CHECK-LABEL: entry
+; CHECK: invariant.start
+; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8
+; CHECK: br label %loop
+entry:
+ %gep = getelementptr inbounds i8, i8* %addr, i64 8
+ %addr.i = bitcast i8* %gep to i32 *
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
+
+
+
+; Same as test above, but the load is no longer invariant (presence of
+; invariant.end). We cannot hoist the addrld out of loop.
+define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence1
+; CHECK-LABEL: entry
+; CHECK: invariant.start
+; CHECK-NEXT: invariant.end
+; CHECK-NEXT: br label %loop
+entry:
+ %gep = getelementptr inbounds i8, i8* %addr, i64 8
+ %addr.i = bitcast i8* %gep to i32 *
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep)
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
+
+; same as test above, but instead of invariant.end, we have the result of
+; invariant.start escaping through a call. We cannot hoist the load.
+define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence2
+; CHECK-LABEL: entry
+; CHECK-NOT: load
+; CHECK: br label %loop
+entry:
+ %gep = getelementptr inbounds i8, i8* %addr, i64 8
+ %addr.i = bitcast i8* %gep to i32 *
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ call void @escaping.invariant.start({}* %invst)
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
+
+; FIXME: invariant.start dominates the load, and in this scope, the
+; load is invariant. So, we can hoist the `addrld` load out of the loop.
+; Consider the loadoperand addr.i bitcasted before being passed to
+; invariant.start
+define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence3
+; CHECK-LABEL: entry
+; CHECK: invariant.start
+; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
+; CHECK: br label %loop
+entry:
+ %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
+ %gep = bitcast i32* %addr.i to i8 *
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
+
+; We should not hoist the addrld out of the loop.
+define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence4
+; CHECK-LABEL: entry
+; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
+; CHECK: br label %loop
+entry:
+ %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
+ %gep = bitcast i32* %addr.i to i8 *
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
More information about the llvm-commits
mailing list