[llvm] 982da7a - [SCEVExpander] Stop hoisting IR when reusing phis
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 17 09:38:39 PDT 2021
Author: Philip Reames
Date: 2021-08-17T09:38:32-07:00
New Revision: 982da7a20c406c5725b5fe20d699a78069e54ffa
URL: https://github.com/llvm/llvm-project/commit/982da7a20c406c5725b5fe20d699a78069e54ffa
DIFF: https://github.com/llvm/llvm-project/commit/982da7a20c406c5725b5fe20d699a78069e54ffa.diff
LOG: [SCEVExpander] Stop hoisting IR when reusing phis
his is a fix for PR43678, and is an alternate patch to D105723.
The basic issue we're running into is that LSR + SCEVExpander are moving the very instruction whose operand we're in the process of expanding. This breaks the subtle and ill-documented invariant which let LSR work. (Full story can be found here: https://reviews.llvm.org/D105723#2878473)
Rather than attempting a fix, this change just removes the optimization entirely. The code is entirely untested, and removing it appears to have no impact I can find. This code was added back in 2014 by 1e12f8563d4b7 with a single test which does not seem to actually test the hoisting logic.
>From a philosophical standpoint, it also seems very strange to have the expander implementing optimizations which should live in a dedicated transform pass.
Differential Revision: https://reviews.llvm.org/D106178
Added:
llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
Modified:
llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 59bf3a342caa..98d62ab9f7b9 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -489,9 +489,6 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, Type *ExpandTy,
Type *IntTy, bool useSubtract);
- void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
- Instruction *Pos, PHINode *LoopPhi);
-
void fixupInsertPoints(Instruction *I);
/// If required, create LCSSA PHIs for \p Users' operand \p OpIdx. If new
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 3978e1e29825..ca1545b82e03 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1125,22 +1125,6 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
return IncV;
}
-/// Hoist the addrec instruction chain rooted in the loop phi above the
-/// position. This routine assumes that this is possible (has been checked).
-void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
- Instruction *Pos, PHINode *LoopPhi) {
- do {
- if (DT->dominates(InstToHoist, Pos))
- break;
- // Make sure the increment is where we want it. But don't move it
- // down past a potential existing post-inc user.
- fixupInsertPoints(InstToHoist);
- InstToHoist->moveBefore(Pos);
- Pos = InstToHoist;
- InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
- } while (InstToHoist != LoopPhi);
-}
-
/// Check whether we can cheaply express the requested SCEV in terms of
/// the available PHI SCEV by truncation and/or inversion of the step.
static bool canBeCheaplyTransformed(ScalarEvolution &SE,
@@ -1264,8 +1248,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (LSRMode) {
if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
continue;
- if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
- continue;
} else {
if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
continue;
@@ -1293,11 +1275,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
}
if (AddRecPhiMatch) {
- // Potentially, move the increment. We have made sure in
- // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
- if (L == IVIncInsertLoop)
- hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
-
// Ok, the add recurrence looks usable.
// Remember this PHI, even in post-inc mode.
InsertedValues.insert(AddRecPhiMatch);
diff --git a/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
new file mode 100644
index 000000000000..289093f469e3
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; These are regression tests for PR43768.
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; Test checks that LSR does not hoist increment of %val9 while expanding the other pieces of formula
+; to original place in backedge causing incorrect SSA form.
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(3)* undef, align 4
+; CHECK-NEXT: [[VAL1:%.*]] = add i32 undef, 12
+; CHECK-NEXT: [[VAL2:%.*]] = trunc i64 undef to i32
+; CHECK-NEXT: [[VAL3:%.*]] = mul i32 [[VAL1]], [[VAL2]]
+; CHECK-NEXT: [[VAL4:%.*]] = sub i32 [[VAL]], [[VAL3]]
+; CHECK-NEXT: [[VAL5:%.*]] = ashr i32 undef, undef
+; CHECK-NEXT: [[VAL6:%.*]] = sub i32 [[VAL4]], [[VAL5]]
+; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[VAL]], 7
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[VAL3]], 7
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[VAL5]], 7
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[VAL6]], 3
+; CHECK-NEXT: br label [[BB7:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB32:%.*]] ], [ 0, [[BB:%.*]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB32]] ], [ -8, [[BB]] ]
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 8
+; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i32 [[LSR_IV1]], [[TMP5]]
+; CHECK-NEXT: [[VAL10:%.*]] = icmp ult i64 [[LSR_IV_NEXT]], 65536
+; CHECK-NEXT: br i1 [[VAL10]], label [[BB12:%.*]], label [[BB11:%.*]]
+; CHECK: bb11:
+; CHECK-NEXT: unreachable
+; CHECK: bb12:
+; CHECK-NEXT: [[VAL14:%.*]] = icmp slt i32 undef, undef
+; CHECK-NEXT: br i1 [[VAL14]], label [[BB17:%.*]], label [[BB12_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]]
+; CHECK: bb15splitsplitsplitsplitsplitsplit:
+; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLITSPLIT:%.*]]
+; CHECK: bb12.bb15splitsplitsplitsplitsplit_crit_edge:
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[VAL6]], [[LSR_IV1]]
+; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLITSPLIT]]
+; CHECK: bb15splitsplitsplitsplitsplit:
+; CHECK-NEXT: [[VAL16_PH_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP6]], [[BB12_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE]] ], [ [[VAL35:%.*]], [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT:%.*]] ]
+; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLIT:%.*]]
+; CHECK: bb17.bb15splitsplitsplitsplit_crit_edge:
+; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[VAL]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[VAL1]], [[VAL2]]
+; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP7]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = shl i32 [[VAL5]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[LSR_IV1]]
+; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLIT]]
+; CHECK: bb15splitsplitsplitsplit:
+; CHECK-NEXT: [[VAL16_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP13]], [[BB17_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLITSPLIT]] ]
+; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT:%.*]]
+; CHECK: bb20.bb15splitsplitsplit_crit_edge:
+; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[VAL]], 3
+; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[VAL1]], [[VAL2]]
+; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 3
+; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[VAL5]], 3
+; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[LSR_IV1]]
+; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT]]
+; CHECK: bb15splitsplitsplit:
+; CHECK-NEXT: [[VAL16_PH_PH_PH:%.*]] = phi i32 [ [[TMP20]], [[BB20_BB15SPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLIT]] ]
+; CHECK-NEXT: br label [[BB15SPLITSPLIT:%.*]]
+; CHECK: bb23.bb15splitsplit_crit_edge:
+; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[VAL]], 2
+; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[VAL1]], [[VAL2]]
+; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 2
+; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP21]], [[TMP23]]
+; CHECK-NEXT: [[TMP25:%.*]] = shl i32 [[VAL5]], 2
+; CHECK-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], [[LSR_IV1]]
+; CHECK-NEXT: br label [[BB15SPLITSPLIT]]
+; CHECK: bb15splitsplit:
+; CHECK-NEXT: [[VAL16_PH_PH:%.*]] = phi i32 [ [[TMP27]], [[BB23_BB15SPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH]], [[BB15SPLITSPLITSPLIT]] ]
+; CHECK-NEXT: br label [[BB15SPLIT:%.*]]
+; CHECK: bb26.bb15split_crit_edge:
+; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[VAL]], 5
+; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[VAL1]], [[VAL2]]
+; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 5
+; CHECK-NEXT: [[TMP31:%.*]] = sub i32 [[TMP28]], [[TMP30]]
+; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[VAL5]], 5
+; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], [[LSR_IV1]]
+; CHECK-NEXT: br label [[BB15SPLIT]]
+; CHECK: bb15split:
+; CHECK-NEXT: [[VAL16_PH:%.*]] = phi i32 [ [[TMP34]], [[BB26_BB15SPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH]], [[BB15SPLITSPLIT]] ]
+; CHECK-NEXT: br label [[BB15:%.*]]
+; CHECK: bb29.bb15_crit_edge:
+; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[VAL]], 6
+; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[VAL1]], [[VAL2]]
+; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], 6
+; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP35]], [[TMP37]]
+; CHECK-NEXT: [[TMP39:%.*]] = mul i32 [[VAL5]], 6
+; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP38]], [[TMP39]]
+; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], [[LSR_IV1]]
+; CHECK-NEXT: br label [[BB15]]
+; CHECK: bb15:
+; CHECK-NEXT: [[VAL16:%.*]] = phi i32 [ [[TMP41]], [[BB29_BB15_CRIT_EDGE:%.*]] ], [ [[VAL16_PH]], [[BB15SPLIT]] ]
+; CHECK-NEXT: call void @widget() [ "deopt"(i32 [[VAL16]], i32 3, i32 [[VAL]]) ]
+; CHECK-NEXT: unreachable
+; CHECK: bb17:
+; CHECK-NEXT: [[VAL19:%.*]] = icmp slt i32 undef, undef
+; CHECK-NEXT: br i1 [[VAL19]], label [[BB20:%.*]], label [[BB17_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE]]
+; CHECK: bb20:
+; CHECK-NEXT: [[VAL22:%.*]] = icmp slt i32 undef, undef
+; CHECK-NEXT: br i1 [[VAL22]], label [[BB23:%.*]], label [[BB20_BB15SPLITSPLITSPLIT_CRIT_EDGE]]
+; CHECK: bb23:
+; CHECK-NEXT: [[VAL25:%.*]] = icmp slt i32 undef, undef
+; CHECK-NEXT: br i1 [[VAL25]], label [[BB26:%.*]], label [[BB23_BB15SPLITSPLIT_CRIT_EDGE]]
+; CHECK: bb26:
+; CHECK-NEXT: [[VAL28:%.*]] = icmp slt i32 undef, undef
+; CHECK-NEXT: br i1 [[VAL28]], label [[BB29:%.*]], label [[BB26_BB15SPLIT_CRIT_EDGE]]
+; CHECK: bb29:
+; CHECK-NEXT: [[VAL31:%.*]] = icmp slt i32 undef, undef
+; CHECK-NEXT: br i1 [[VAL31]], label [[BB32]], label [[BB29_BB15_CRIT_EDGE]]
+; CHECK: bb32:
+; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP4]], [[LSR_IV1]]
+; CHECK-NEXT: [[VAL35]] = add i32 [[TMP42]], [[VAL6]]
+; CHECK-NEXT: br i1 false, label [[BB7]], label [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT]]
+;
+bb:
+ %val = load i32, i32 addrspace(3)* undef, align 4
+ %val1 = add i32 undef, 12
+ %val2 = trunc i64 undef to i32
+ %val3 = mul i32 %val1, %val2
+ %val4 = sub i32 %val, %val3
+ %val5 = ashr i32 undef, undef
+ %val6 = sub i32 %val4, %val5
+ br label %bb7
+
+bb7: ; preds = %bb32, %bb
+ %val8 = phi i64 [ 0, %bb ], [ %val34, %bb32 ]
+ %val9 = phi i32 [ 0, %bb ], [ %val35, %bb32 ]
+ %val10 = icmp ult i64 %val8, 65536
+ br i1 %val10, label %bb12, label %bb11
+
+bb11: ; preds = %bb7
+ unreachable
+
+bb12: ; preds = %bb7
+ %val13 = add i32 %val9, %val6
+ %val14 = icmp slt i32 undef, undef
+ br i1 %val14, label %bb17, label %bb15
+
+bb15: ; preds = %bb32, %bb29, %bb26, %bb23, %bb20, %bb17, %bb12
+ %val16 = phi i32 [ %val35, %bb32 ], [ %val30, %bb29 ], [ %val27, %bb26 ], [ %val24, %bb23 ], [ %val21, %bb20 ], [ %val18, %bb17 ], [ %val13, %bb12 ]
+ call void @widget() [ "deopt"(i32 %val16, i32 3, i32 %val) ]
+ unreachable
+
+bb17: ; preds = %bb12
+ %val18 = add i32 %val13, %val6
+ %val19 = icmp slt i32 undef, undef
+ br i1 %val19, label %bb20, label %bb15
+
+bb20: ; preds = %bb17
+ %val21 = add i32 %val18, %val6
+ %val22 = icmp slt i32 undef, undef
+ br i1 %val22, label %bb23, label %bb15
+
+bb23: ; preds = %bb20
+ %val24 = add i32 %val21, %val6
+ %val25 = icmp slt i32 undef, undef
+ br i1 %val25, label %bb26, label %bb15
+
+bb26: ; preds = %bb23
+ %val27 = add i32 %val24, %val6
+ %val28 = icmp slt i32 undef, undef
+ br i1 %val28, label %bb29, label %bb15
+
+bb29: ; preds = %bb26
+ %val30 = add i32 %val27, %val6
+ %val31 = icmp slt i32 undef, undef
+ br i1 %val31, label %bb32, label %bb15
+
+bb32: ; preds = %bb29
+ %val33 = add i32 %val30, %val6
+ %val34 = add nuw nsw i64 %val8, 8
+ %val35 = add i32 %val33, %val6
+ br i1 false, label %bb7, label %bb15
+}
+
+; Test checks that LSR does not hoist increment of %val8 while expanding the other pieces of formula
+; to original place in backedge causing incorrect SSA form.
+define void @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[VAL:%.*]] = bitcast i8* null to i32*
+; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32* [[VAL]], align 4
+; CHECK-NEXT: [[VAL2:%.*]] = bitcast i8* null to i32*
+; CHECK-NEXT: [[VAL3:%.*]] = load i32, i32* [[VAL2]], align 4
+; CHECK-NEXT: br label [[BB6:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: [[VAL5:%.*]] = sext i32 [[VAL16:%.*]] to i64
+; CHECK-NEXT: unreachable
+; CHECK: bb6:
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB12:%.*]] ], [ -1, [[BB:%.*]] ]
+; CHECK-NEXT: [[VAL8:%.*]] = phi i32 [ [[VAL16]], [[BB12]] ], [ [[VAL3]], [[BB]] ]
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 1
+; CHECK-NEXT: [[VAL10:%.*]] = icmp ult i64 [[LSR_IV_NEXT]], 1048576
+; CHECK-NEXT: br i1 [[VAL10]], label [[BB12]], label [[BB11:%.*]]
+; CHECK: bb11:
+; CHECK-NEXT: unreachable
+; CHECK: bb12:
+; CHECK-NEXT: [[VAL14:%.*]] = add i32 [[VAL8]], [[VAL1]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[VAL1]], [[VAL8]]
+; CHECK-NEXT: [[VAL15:%.*]] = select i1 false, i32 [[VAL14]], i32 [[VAL8]]
+; CHECK-NEXT: [[VAL16]] = add i32 [[TMP0]], 1
+; CHECK-NEXT: [[VAL17:%.*]] = fcmp olt double 0.000000e+00, 2.270000e+02
+; CHECK-NEXT: br i1 [[VAL17]], label [[BB6]], label [[BB4:%.*]]
+;
+bb:
+ %val = bitcast i8* null to i32*
+ %val1 = load i32, i32* %val, align 4
+ %val2 = bitcast i8* null to i32*
+ %val3 = load i32, i32* %val2, align 4
+ br label %bb6
+
+bb4: ; preds = %bb12
+ %val5 = sext i32 %val16 to i64
+ unreachable
+
+bb6: ; preds = %bb12, %bb
+ %val7 = phi i64 [ %val9, %bb12 ], [ 0, %bb ]
+ %val8 = phi i32 [ %val16, %bb12 ], [ %val3, %bb ]
+ %val9 = add nuw nsw i64 %val7, 1
+ %val10 = icmp ult i64 %val7, 1048576
+ br i1 %val10, label %bb12, label %bb11
+
+bb11: ; preds = %bb6
+ unreachable
+
+bb12: ; preds = %bb6
+ %val13 = select i1 false, i32 0, i32 %val8
+ %val14 = add i32 %val8, %val1
+ %val15 = select i1 false, i32 %val14, i32 %val13
+ %val16 = add i32 %val14, 1
+ %val17 = fcmp olt double 0.000000e+00, 2.270000e+02
+ br i1 %val17, label %bb6, label %bb4
+}
+
+declare void @widget()
More information about the llvm-commits
mailing list