<div dir="ltr">Note two things:<br>1. That pre-single-pred may be a thing worth testing. It's just that in that example, the load is always executed, so it's not lengthening any path.<div>2. There is no other way to fix this bug given our current PRE in GVN, and I believe it is theoretically possible it may cause PRE to place computations on paths they did not exist before (our machinery is hacky enough that it's hard to reason about)</div><div><br></div><div><br></div><div>It is also currently limited to globals safe to speculatively execute, but you should keep an eye on this to make sure there are no performance degradations.</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Sat, Oct 15, 2016 at 2:35 PM, Davide Italiano via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: davide<br>
Date: Sat Oct 15 16:35:23 2016<br>
New Revision: 284311<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=284311&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=284311&view=rev</a><br>
Log:<br>
[GVN/PRE] Hoist global values outside of loops.<br>
<br>
In theory this could be generalized to move anything where<br>
we prove the operands are available, but that would require<br>
rewriting PRE. As NewGVN will hopefully come soon, and we're<br>
trying to rewrite PRE in terms of NewGVN+MemorySSA, it's probably<br>
not worth spending too much time on it. Fix provided by<br>
Daniel Berlin!<br>
<br>
Added:<br>
llvm/trunk/test/Transforms/<wbr>GVN/PRE/hoist-loads.ll<br>
Modified:<br>
llvm/trunk/lib/Transforms/<wbr>Scalar/GVN.cpp<br>
llvm/trunk/test/Transforms/<wbr>GVN/PRE/pre-single-pred.ll<br>
<br>
Modified: llvm/trunk/lib/Transforms/<wbr>Scalar/GVN.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=284311&r1=284310&r2=284311&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/<wbr>Transforms/Scalar/GVN.cpp?rev=<wbr>284311&r1=284310&r2=284311&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Transforms/<wbr>Scalar/GVN.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/<wbr>Scalar/GVN.cpp Sat Oct 15 16:35:23 2016<br>
@@ -122,69 +122,84 @@ template <> struct DenseMapInfo<GVN::Exp<br>
/// location of the instruction from which it was formed.<br>
struct llvm::gvn::AvailableValue {<br>
enum ValType {<br>
- SimpleVal, // A simple offsetted value that is accessed.<br>
- LoadVal, // A value produced by a load.<br>
- MemIntrin, // A memory intrinsic which is loaded from.<br>
- UndefVal // A UndefValue representing a value from dead block (which<br>
- // is not yet physically removed from the CFG).<br>
+ SimpleVal, // A simple offsetted value that is accessed.<br>
+ LoadVal, // A value produced by a load.<br>
+ MemIntrinVal, // A memory intrinsic which is loaded from.<br>
+ UndefVal, // A UndefValue representing a value from dead block (which<br>
+ // is not yet physically removed from the CFG).<br>
+ CreateLoadVal // A duplicate load can be created higher up in the CFG that<br>
+ // will eliminate this one.<br>
};<br>
<br>
/// V - The value that is live out of the block.<br>
- PointerIntPair<Value *, 2, ValType> Val;<br>
+ std::pair<Value *, ValType> Val;<br>
<br>
/// Offset - The byte offset in Val that is interesting for the load query.<br>
unsigned Offset;<br>
<br>
static AvailableValue get(Value *V, unsigned Offset = 0) {<br>
AvailableValue Res;<br>
- Res.Val.setPointer(V);<br>
- Res.Val.setInt(SimpleVal);<br>
+ Res.Val.first = V;<br>
+ Res.Val.second = SimpleVal;<br>
Res.Offset = Offset;<br>
return Res;<br>
}<br>
<br>
static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {<br>
AvailableValue Res;<br>
- Res.Val.setPointer(MI);<br>
- Res.Val.setInt(MemIntrin);<br>
+ Res.Val.first = MI;<br>
+ Res.Val.second = MemIntrinVal;<br>
Res.Offset = Offset;<br>
return Res;<br>
}<br>
<br>
+ static AvailableValue getCreateLoad(LoadInst *LI) {<br>
+ AvailableValue Res;<br>
+ Res.Val.first = LI;<br>
+ Res.Val.second = CreateLoadVal;<br>
+ return Res;<br>
+ }<br>
+<br>
static AvailableValue getLoad(LoadInst *LI, unsigned Offset = 0) {<br>
AvailableValue Res;<br>
- Res.Val.setPointer(LI);<br>
- Res.Val.setInt(LoadVal);<br>
+ Res.Val.first = LI;<br>
+ Res.Val.second = LoadVal;<br>
Res.Offset = Offset;<br>
return Res;<br>
}<br>
<br>
static AvailableValue getUndef() {<br>
AvailableValue Res;<br>
- Res.Val.setPointer(nullptr);<br>
- Res.Val.setInt(UndefVal);<br>
+ Res.Val.first = nullptr;<br>
+ Res.Val.second = UndefVal;<br>
Res.Offset = 0;<br>
return Res;<br>
}<br>
<br>
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }<br>
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }<br>
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }<br>
- bool isUndefValue() const { return Val.getInt() == UndefVal; }<br>
+ bool isSimpleValue() const { return Val.second == SimpleVal; }<br>
+ bool isCoercedLoadValue() const { return Val.second == LoadVal; }<br>
+ bool isMemIntrinValue() const { return Val.second == MemIntrinVal; }<br>
+ bool isUndefValue() const { return Val.second == UndefVal; }<br>
+ bool isCreateLoadValue() const { return Val.second == CreateLoadVal; }<br>
+<br>
+ LoadInst *getCreateLoadValue() const {<br>
+ assert(isCreateLoadValue() && "Wrong accessor");<br>
+ return cast<LoadInst>(Val.first);<br>
+ }<br>
<br>
Value *getSimpleValue() const {<br>
assert(isSimpleValue() && "Wrong accessor");<br>
- return Val.getPointer();<br>
+ return Val.first;<br>
}<br>
<br>
LoadInst *getCoercedLoadValue() const {<br>
assert(isCoercedLoadValue() && "Wrong accessor");<br>
- return cast<LoadInst>(Val.getPointer(<wbr>));<br>
+ return cast<LoadInst>(Val.first);<br>
}<br>
<br>
MemIntrinsic *getMemIntrinValue() const {<br>
assert(isMemIntrinValue() && "Wrong accessor");<br>
- return cast<MemIntrinsic>(Val.<wbr>getPointer());<br>
+ return cast<MemIntrinsic>(Val.first);<br>
}<br>
<br>
/// Emit code at the specified insertion point to adjust the value defined<br>
@@ -1191,7 +1206,11 @@ Value *AvailableValue::<wbr>MaterializeAdjust<br>
Value *Res;<br>
Type *LoadTy = LI->getType();<br>
const DataLayout &DL = LI->getModule()-><wbr>getDataLayout();<br>
- if (isSimpleValue()) {<br>
+ if (isCreateLoadValue()) {<br>
+ Instruction *I = getCreateLoadValue()->clone();<br>
+ I->insertBefore(InsertPt);<br>
+ Res = I;<br>
+ } else if (isSimpleValue()) {<br>
Res = getSimpleValue();<br>
if (Res->getType() != LoadTy) {<br>
Res = GetStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);<br>
@@ -1379,7 +1398,7 @@ void GVN::AnalyzeLoadAvailability(<wbr>LoadIn<br>
continue;<br>
}<br>
<br>
- if (!DepInfo.isDef() && !DepInfo.isClobber()) {<br>
+ if (!DepInfo.isDef() && !DepInfo.isClobber() && !DepInfo.isNonFuncLocal()) {<br>
UnavailableBlocks.push_back(<wbr>DepBB);<br>
continue;<br>
}<br>
@@ -1390,12 +1409,25 @@ void GVN::AnalyzeLoadAvailability(<wbr>LoadIn<br>
Value *Address = Deps[i].getAddress();<br>
<br>
AvailableValue AV;<br>
- if (AnalyzeLoadAvailability(LI, DepInfo, Address, AV)) {<br>
+ // TODO: We can use anything where the operands are available, and we should<br>
+ // learn to recreate operands in other blocks if they are available.<br>
+ // Because we don't have the infrastructure in our PRE, we restrict this to<br>
+ // global values, because we know the operands are always available.<br>
+ if (DepInfo.isNonFuncLocal()) {<br>
+ if (isSafeToSpeculativelyExecute(<wbr>LI) &&<br>
+ isa<GlobalValue>(LI-><wbr>getPointerOperand())) {<br>
+ AV = AvailableValue::getCreateLoad(<wbr>LI);<br>
+ ValuesPerBlock.push_back(<wbr>AvailableValueInBlock::get(<br>
+ &LI->getParent()->getParent()-<wbr>>getEntryBlock(), std::move(AV)));<br>
+ } else<br>
+ UnavailableBlocks.push_back(<wbr>DepBB);<br>
+<br>
+ } else if (AnalyzeLoadAvailability(LI, DepInfo, Address, AV)) {<br>
// subtlety: because we know this was a non-local dependency, we know<br>
// it's safe to materialize anywhere between the instruction within<br>
// DepInfo and the end of it's block.<br>
- ValuesPerBlock.push_back(<wbr>AvailableValueInBlock::get(<wbr>DepBB,<br>
- std::move(AV)));<br>
+ ValuesPerBlock.push_back(<br>
+ AvailableValueInBlock::get(<wbr>DepBB, std::move(AV)));<br>
} else {<br>
UnavailableBlocks.push_back(<wbr>DepBB);<br>
}<br>
<br>
Added: llvm/trunk/test/Transforms/<wbr>GVN/PRE/hoist-loads.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/PRE/hoist-loads.ll?rev=284311&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>Transforms/GVN/PRE/hoist-<wbr>loads.ll?rev=284311&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/Transforms/<wbr>GVN/PRE/hoist-loads.ll (added)<br>
+++ llvm/trunk/test/Transforms/<wbr>GVN/PRE/hoist-loads.ll Sat Oct 15 16:35:23 2016<br>
@@ -0,0 +1,53 @@<br>
+; RUN: opt -gvn %s -S -o - | FileCheck %s<br>
+<br>
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:<wbr>32:64-S128"<br>
+target triple = "x86_64-unknown-linux-gnu"<br>
+<br>
+; Check that the loads of @aaa, @bbb and @ccc are hoisted.<br>
+; CHECK-LABEL: define void @foo<br>
+; CHECK-NEXT: %2 = load i32, i32* @ccc, align 4<br>
+; CHECK-NEXT: %3 = load i32, i32* @bbb, align 4<br>
+; CHECK-NEXT: %4 = load i32, i32* @aaa, align 4<br>
+<br>
+@aaa = local_unnamed_addr global i32 10, align 4<br>
+@bbb = local_unnamed_addr global i32 20, align 4<br>
+@ccc = local_unnamed_addr global i32 30, align 4<br>
+<br>
+define void @foo(i32* nocapture readonly) local_unnamed_addr {<br>
+ br label %2<br>
+<br>
+ %.0 = phi i32* [ %0, %1 ], [ %3, %22 ]<br>
+ %3 = getelementptr inbounds i32, i32* %.0, i64 1<br>
+ %4 = load i32, i32* %.0, align 4<br>
+ %5 = load i32, i32* @ccc, align 4<br>
+ %6 = icmp sgt i32 %4, %5<br>
+ br i1 %6, label %7, label %10<br>
+<br>
+ %8 = load i32, i32* @bbb, align 4<br>
+ %9 = add nsw i32 %8, %4<br>
+ store i32 %9, i32* @bbb, align 4<br>
+ br label %10<br>
+<br>
+ %11 = load i32, i32* @bbb, align 4<br>
+ %12 = icmp sgt i32 %4, %11<br>
+ br i1 %12, label %13, label %16<br>
+<br>
+ %14 = load i32, i32* @aaa, align 4<br>
+ %15 = add nsw i32 %14, %4<br>
+ store i32 %15, i32* @aaa, align 4<br>
+ br label %16<br>
+<br>
+ %17 = load i32, i32* @aaa, align 4<br>
+ %18 = icmp sgt i32 %4, %17<br>
+ br i1 %18, label %19, label %22<br>
+<br>
+ %20 = load i32, i32* @ccc, align 4<br>
+ %21 = add nsw i32 %20, %4<br>
+ store i32 %21, i32* @ccc, align 4<br>
+ br label %22<br>
+<br>
+ %23 = icmp slt i32 %4, 0<br>
+ br i1 %23, label %24, label %2<br>
+<br>
+ ret void<br>
+}<br>
<br>
Modified: llvm/trunk/test/Transforms/<wbr>GVN/PRE/pre-single-pred.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/PRE/pre-single-pred.ll?rev=284311&r1=284310&r2=284311&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>Transforms/GVN/PRE/pre-single-<wbr>pred.ll?rev=284311&r1=284310&<wbr>r2=284311&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/Transforms/<wbr>GVN/PRE/pre-single-pred.ll (original)<br>
+++ llvm/trunk/test/Transforms/<wbr>GVN/PRE/pre-single-pred.ll Sat Oct 15 16:35:23 2016<br>
@@ -1,16 +1,9 @@<br>
; RUN: opt < %s -gvn -enable-load-pre -S | FileCheck %s<br>
-; This testcase assumed we'll PRE the load into %for.cond, but we don't actually<br>
-; verify that doing so is safe. If there didn't _happen_ to be a load in<br>
-; %for.end, we would actually be lengthening the execution on some paths, and<br>
-; we were never actually checking that case. Now we actually do perform some<br>
-; conservative checking to make sure we don't make paths longer, but we don't<br>
-; currently get this case, which we got lucky on previously.<br>
-;<br>
-; Now that that faulty assumption is corrected, test that we DON'T incorrectly<br>
-; hoist the load. Doing the right thing for the wrong reasons is still a bug.<br>
<br>
@p = external global i32<br>
define i32 @f(i32 %n) nounwind {<br>
+; CHECK: entry:<br>
+; CHECK-NEXT: %0 = load i32, i32* @p<br>
entry:<br>
br label %for.cond<br>
<br>
@@ -22,8 +15,9 @@ for.cond: ; preds = %for.inc, %entry<br>
for.cond.for.end_crit_edge: ; preds = %for.cond<br>
br label %for.end<br>
<br>
+; The load of @p should be hoisted into the entry block.<br>
; CHECK: for.body:<br>
-; CHECK-NEXT: %tmp3 = load i32, i32* @p<br>
+; CHECK-NEXT: %dec = add i32 %tmp3, -1<br>
for.body: ; preds = %for.cond<br>
%tmp3 = load i32, i32* @p ; <i32> [#uses=1]<br>
%dec = add i32 %tmp3, -1 ; <i32> [#uses=2]<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>