[llvm] [LSR] Do not create duplicated PHI nodes while preserving LCSSA form (PR #107380)
Sergey Kachkov via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 07:20:18 PDT 2024
https://github.com/skachkov-sc updated https://github.com/llvm/llvm-project/pull/107380
>From 8b768353ebe8c3303eb56d799beab98d7aa02010 Mon Sep 17 00:00:00 2001
From: Sergey Kachkov <sergey.kachkov at syntacore.com>
Date: Fri, 30 Aug 2024 16:00:42 +0300
Subject: [PATCH 1/2] [LSR][NFC] Add pre-commit test
---
.../LoopStrengthReduce/preserve-lcssa.ll | 115 ++++++++++++++++++
1 file changed, 115 insertions(+)
diff --git a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll
index 0add19e286f583..883cd134952379 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll
@@ -89,3 +89,118 @@ loop_exit_7: ; preds = %be_6, %loop_4
%val_i32_24.lcssa = phi i32 [ %val_i32_24, %be_6 ], [ %val_i32_24, %loop_4 ]
br label %bb_5
}
+
+define i64 @test_duplicated_phis(i64 noundef %N) {
+; LEGACYPM-LABEL: define i64 @test_duplicated_phis
+; LEGACYPM-SAME: (i64 noundef [[N:%.*]]) {
+; LEGACYPM-NEXT: entry:
+; LEGACYPM-NEXT: [[MUL:%.*]] = shl i64 [[N]], 1
+; LEGACYPM-NEXT: [[CMP6_NOT:%.*]] = icmp eq i64 [[MUL]], 0
+; LEGACYPM-NEXT: br i1 [[CMP6_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; LEGACYPM: for.body.preheader:
+; LEGACYPM-NEXT: [[TMP0:%.*]] = icmp ult i64 [[MUL]], 4
+; LEGACYPM-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; LEGACYPM: for.body.preheader.new:
+; LEGACYPM-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
+; LEGACYPM-NEXT: br label [[FOR_BODY:%.*]]
+; LEGACYPM: for.body:
+; LEGACYPM-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ -1, [[FOR_BODY_PREHEADER_NEW]] ]
+; LEGACYPM-NEXT: [[I_07:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ]
+; LEGACYPM-NEXT: [[INC_3]] = add i64 [[I_07]], 4
+; LEGACYPM-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
+; LEGACYPM-NEXT: [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]]
+; LEGACYPM-NEXT: br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; LEGACYPM: for.end.loopexit.unr-lcssa.loopexit:
+; LEGACYPM-NEXT: [[LSR_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT]], [[FOR_BODY]] ]
+; LEGACYPM-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_NEXT]], 1
+; LEGACYPM-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; LEGACYPM: for.end.loopexit.unr-lcssa:
+; LEGACYPM-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TMP1]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; LEGACYPM-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT_LCSSA]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; LEGACYPM-NEXT: [[TMP2:%.*]] = and i64 [[N]], 1
+; LEGACYPM-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP2]], 0
+; LEGACYPM-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]]
+; LEGACYPM-NEXT: br label [[FOR_END]]
+; LEGACYPM: for.end:
+; LEGACYPM-NEXT: [[RES_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; LEGACYPM-NEXT: ret i64 [[RES_0_LCSSA]]
+;
+; NEWPM-LABEL: define i64 @test_duplicated_phis
+; NEWPM-SAME: (i64 noundef [[N:%.*]]) {
+; NEWPM-NEXT: entry:
+; NEWPM-NEXT: [[MUL:%.*]] = shl i64 [[N]], 1
+; NEWPM-NEXT: [[CMP6_NOT:%.*]] = icmp eq i64 [[MUL]], 0
+; NEWPM-NEXT: br i1 [[CMP6_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; NEWPM: for.body.preheader:
+; NEWPM-NEXT: [[TMP0:%.*]] = icmp ult i64 [[MUL]], 4
+; NEWPM-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; NEWPM: for.body.preheader.new:
+; NEWPM-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
+; NEWPM-NEXT: br label [[FOR_BODY:%.*]]
+; NEWPM: for.body:
+; NEWPM-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 3, [[FOR_BODY_PREHEADER_NEW]] ]
+; NEWPM-NEXT: [[I_07:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ]
+; NEWPM-NEXT: [[INC_3]] = add i64 [[I_07]], 4
+; NEWPM-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -2
+; NEWPM-NEXT: [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]]
+; NEWPM-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_NEXT]], -3
+; NEWPM-NEXT: br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; NEWPM: for.end.loopexit.unr-lcssa.loopexit:
+; NEWPM-NEXT: [[REASS_SUB_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT]], [[FOR_BODY]] ]
+; NEWPM-NEXT: [[RES_1_3_LCSSA:%.*]] = phi i64 [ [[TMP1]], [[FOR_BODY]] ]
+; NEWPM-NEXT: [[TMP2:%.*]] = add i64 [[REASS_SUB_LCSSA]], -4
+; NEWPM-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; NEWPM: for.end.loopexit.unr-lcssa:
+; NEWPM-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[RES_1_3_LCSSA]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; NEWPM-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[TMP2]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; NEWPM-NEXT: [[TMP3:%.*]] = and i64 [[N]], 1
+; NEWPM-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP3]], 0
+; NEWPM-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]]
+; NEWPM-NEXT: br label [[FOR_END]]
+; NEWPM: for.end:
+; NEWPM-NEXT: [[RES_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; NEWPM-NEXT: ret i64 [[RES_0_LCSSA]]
+;
+entry:
+ %mul = shl i64 %N, 1
+ %cmp6.not = icmp eq i64 %mul, 0
+ br i1 %cmp6.not, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ %0 = icmp ult i64 %mul, 4
+ br i1 %0, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new
+
+for.body.preheader.new:
+ %unroll_iter = and i64 %mul, -4
+ br label %for.body
+
+for.body:
+ %res.09 = phi i64 [ 0, %for.body.preheader.new ], [ %res.1.3, %for.body ]
+ %i.07 = phi i64 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
+ %niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.3, %for.body ]
+ %res.1.1 = add i64 %res.09, -1
+ %inc.1 = or disjoint i64 %i.07, 2
+ %res.1.2 = add i64 %inc.1, %res.1.1
+ %reass.sub = sub i64 %res.1.2, %i.07
+ %res.1.3 = add i64 %reass.sub, -3
+ %inc.3 = add nuw i64 %i.07, 4
+ %niter.next.3 = add i64 %niter, 4
+ %niter.ncmp.3.not = icmp eq i64 %niter.next.3, %unroll_iter
+ br i1 %niter.ncmp.3.not, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body
+
+for.end.loopexit.unr-lcssa.loopexit:
+ %1 = add i64 %reass.sub, -4
+ br label %for.end.loopexit.unr-lcssa
+
+for.end.loopexit.unr-lcssa:
+ %res.1.lcssa.ph = phi i64 [ undef, %for.body.preheader ], [ %res.1.3, %for.end.loopexit.unr-lcssa.loopexit ]
+ %res.09.unr = phi i64 [ -1, %for.body.preheader ], [ %1, %for.end.loopexit.unr-lcssa.loopexit ]
+ %2 = and i64 %N, 1
+ %lcmp.mod.not = icmp eq i64 %2, 0
+ %spec.select = select i1 %lcmp.mod.not, i64 %res.1.lcssa.ph, i64 %res.09.unr
+ br label %for.end
+
+for.end:
+ %res.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.end.loopexit.unr-lcssa ]
+ ret i64 %res.0.lcssa
+}
>From bf92ef525b9697480958c155734ad6c87d279d2c Mon Sep 17 00:00:00 2001
From: Sergey Kachkov <sergey.kachkov at syntacore.com>
Date: Wed, 4 Sep 2024 17:42:03 +0300
Subject: [PATCH 2/2] [LSR] Do not create duplicated PHI nodes while preserving
LCSSA form
---
.../Transforms/Scalar/LoopStrengthReduce.cpp | 31 ++++++++++---------
.../2011-10-03-CritEdgeMerge.ll | 19 ++++++------
.../AMDGPU/lsr-invalid-ptr-extend.ll | 2 +-
.../X86/2011-11-29-postincphi.ll | 11 ++++---
.../X86/expander-crashes.ll | 2 +-
.../X86/missing-phi-operand-update.ll | 25 ++++++++-------
.../LoopStrengthReduce/preserve-lcssa.ll | 17 +++++-----
7 files changed, 55 insertions(+), 52 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 3ca3818938fd26..f966ccaa838422 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2186,6 +2186,12 @@ class LSRInstance {
/// Induction variables that were generated and inserted by the SCEV Expander.
SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
+ // Inserting instructions in the loop and using them as PHI's input could
+ // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
+ // corresponding incoming block is not loop exiting). So collect all such
+ // instructions to form LCSSA for them later.
+ SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
+
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
@@ -2276,9 +2282,9 @@ class LSRInstance {
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
const Formula &F,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts);
void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts);
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
public:
@@ -5858,17 +5864,11 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
/// Helper for Rewrite. PHI nodes are special because the use of their operands
/// effectively happens in their predecessor blocks, so the expression may need
/// to be expanded in multiple places.
-void LSRInstance::RewriteForPHI(
- PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
+void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,
+ const LSRFixup &LF, const Formula &F,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
DenseMap<BasicBlock *, Value *> Inserted;
- // Inserting instructions in the loop and using them as PHI's input could
- // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
- // corresponding incoming block is not loop exiting). So collect all such
- // instructions to form LCSSA for them later.
- SmallVector<Instruction *, 4> InsertedNonLCSSAInsts;
-
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
bool needUpdateFixups = false;
@@ -5939,7 +5939,7 @@ void LSRInstance::RewriteForPHI(
// the inserted value.
if (auto *I = dyn_cast<Instruction>(FullV))
if (L->contains(I) && !L->contains(BB))
- InsertedNonLCSSAInsts.push_back(I);
+ InsertedNonLCSSAInsts.insert(I);
PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
@@ -5983,8 +5983,6 @@ void LSRInstance::RewriteForPHI(
}
}
}
-
- formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE);
}
/// Emit instructions for the leading candidate expression for this LSRUse (this
@@ -5992,7 +5990,7 @@ void LSRInstance::RewriteForPHI(
/// expanded value.
void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
const Formula &F,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
@@ -6080,6 +6078,9 @@ void LSRInstance::ImplementSolution(
Changed = true;
}
+ auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
+ formLCSSAForInstructions(InsertedInsts, DT, LI, &SE);
+
for (const IVChain &Chain : IVChainVec) {
GenerateIVChain(Chain, DeadInsts);
Changed = true;
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll b/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
index bf52c968ad8708..7195d4cab96f47 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
@@ -24,15 +24,15 @@ define ptr @test1() {
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
; CHECK: bbA:
; CHECK-NEXT: switch i32 0, label [[BBA_BB89_CRIT_EDGE:%.*]] [
-; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]]
-; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]]
+; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]]
+; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]]
; CHECK-NEXT: ]
; CHECK: bbA.bb89_crit_edge:
; CHECK-NEXT: br label [[BB89:%.*]]
; CHECK: bbB:
; CHECK-NEXT: switch i8 0, label [[BBB_BB89_CRIT_EDGE:%.*]] [
-; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]]
-; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]]
+; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]]
+; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]]
; CHECK-NEXT: ]
; CHECK: bbB.bb89_crit_edge:
; CHECK-NEXT: br label [[BB89]]
@@ -85,23 +85,22 @@ define ptr @test2() {
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 1
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
; CHECK: loopexit:
-; CHECK-NEXT: [[SCEVGEP_LCSSA1:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
; CHECK-NEXT: [[SCEVGEP_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
; CHECK: bbA:
; CHECK-NEXT: switch i32 0, label [[BB89:%.*]] [
-; CHECK-NEXT: i32 47, label [[BB89]]
-; CHECK-NEXT: i32 58, label [[BB89]]
+; CHECK-NEXT: i32 47, label [[BB89]]
+; CHECK-NEXT: i32 58, label [[BB89]]
; CHECK-NEXT: ]
; CHECK: bbB:
; CHECK-NEXT: switch i8 0, label [[BBB_EXIT_CRIT_EDGE:%.*]] [
-; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]]
-; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]]
+; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]]
+; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]]
; CHECK-NEXT: ]
; CHECK: bbB.exit_crit_edge:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: bb89:
-; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ]
+; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: [[RESULT:%.*]] = phi ptr [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP_LCSSA]], [[BBB_EXIT_CRIT_EDGE]] ]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll
index b4fb4fe7aaf969..737a590394e5ff 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll
@@ -16,8 +16,8 @@ define amdgpu_kernel void @scaledregtest() local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: loopexit:
-; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SCEVGEP11_LCSSA:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP11:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_BODY_1:%.*]]
; CHECK: for.body.1:
; CHECK-NEXT: [[LSR_IV5:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP6:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP11_LCSSA]], [[LOOPEXIT:%.*]] ]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
index fbb9e2a7b6b828..841836c7d2dd86 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
@@ -20,16 +20,17 @@ define i64 @sqlite3DropTriggerPtr() nounwind {
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rbx, %rcx
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.2: # %bb4
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: leaq 1(%rcx), %rbx
+; CHECK-NEXT: incq %rbx
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: .LBB0_3: # %bb8
-; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: # %bb.3: # %bb8split
+; CHECK-NEXT: decq %rbx
+; CHECK-NEXT: .LBB0_4: # %bb8
+; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
bb:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll
index 29c03b88c5fb1a..d652e5c5aa0601 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll
@@ -21,8 +21,8 @@ define i64 @blam(ptr %start, ptr %end, ptr %ptr.2) {
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[END:%.*]]
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_2_PH:%.*]], label [[LOOP_1_HEADER]]
; CHECK: loop.2.ph:
-; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
; CHECK-NEXT: [[LSR_IV_NEXT5_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT5]], [[LOOP_1_HEADER]] ]
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
; CHECK-NEXT: br label [[LOOP_2_HEADER:%.*]]
; CHECK: loop.2.header:
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[LOOP_2_LATCH:%.*]] ], [ [[LSR_IV_NEXT5_LCSSA]], [[LOOP_2_PH]] ]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll
index b13503543d6ee7..ae24da06415cce 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll
@@ -18,23 +18,24 @@ define i32 @foo(ptr %A, i32 %t) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP_32:%.*]]
; CHECK: loop.exit.loopexitsplitsplitsplit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV:%.*]], -1
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV1:%.*]], [[IFMERGE_34:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT:%.*]]
; CHECK: ifmerge.38.loop.exit.loopexitsplitsplit_crit_edge:
-; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_38:%.*]] ]
+; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_38:%.*]] ]
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT]]
; CHECK: loop.exit.loopexitsplitsplit:
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH_PH:%.*]] = phi i64 [ [[LSR_IV_LCSSA10]], [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT:%.*]] ]
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT:%.*]]
; CHECK: ifmerge.42.loop.exit.loopexitsplit_crit_edge:
-; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_42:%.*]] ]
+; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_42:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_LCSSA11]], 1
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT]]
; CHECK: loop.exit.loopexitsplit:
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH:%.*]] = phi i64 [ [[TMP1]], [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ]
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK: then.34.loop.exit.loopexit_crit_edge:
-; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV]], [[THEN_34:%.*]] ]
+; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV1]], [[THEN_34:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_LCSSA]], -2
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT]]
; CHECK: loop.exit.loopexit:
@@ -48,23 +49,23 @@ define i32 @foo(ptr %A, i32 %t) {
; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP]], [[LOOP_EXIT]] ], [ 50, [[THEN_8_1]] ], [ 50, [[IFMERGE_8:%.*]] ]
; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
; CHECK: loop.32:
-; CHECK-NEXT: [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[LSR_IV1]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I1_I64_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTIVLOOP_32:%.*]], [[IFMERGE_46]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 -4
; CHECK-NEXT: [[GEPLOAD:%.*]] = load i32, ptr [[SCEVGEP8]], align 4
; CHECK-NEXT: [[CMP_34:%.*]] = icmp sgt i32 [[GEPLOAD]], [[T]]
-; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34:%.*]]
+; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34]]
; CHECK: then.34:
-; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 -8
; CHECK-NEXT: [[GEPLOAD18:%.*]] = load i32, ptr [[SCEVGEP6]], align 4
; CHECK-NEXT: [[CMP_35:%.*]] = icmp slt i32 [[GEPLOAD18]], [[T]]
; CHECK-NEXT: br i1 [[CMP_35]], label [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE]], label [[IFMERGE_34]]
; CHECK: ifmerge.34:
-; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
; CHECK-NEXT: [[GEPLOAD20:%.*]] = load i32, ptr [[SCEVGEP4]], align 4
; CHECK-NEXT: [[CMP_38:%.*]] = icmp sgt i32 [[GEPLOAD20]], [[T]]
@@ -72,7 +73,7 @@ define i32 @foo(ptr %A, i32 %t) {
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_38]], [[CMP_39]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT]], label [[IFMERGE_38]]
; CHECK: ifmerge.38:
-; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4
; CHECK-NEXT: [[GEPLOAD24:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
@@ -81,7 +82,7 @@ define i32 @foo(ptr %A, i32 %t) {
; CHECK-NEXT: [[OR_COND55:%.*]] = and i1 [[CMP_42]], [[CMP_43]]
; CHECK-NEXT: br i1 [[OR_COND55]], label [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE]], label [[IFMERGE_42]]
; CHECK: ifmerge.42:
-; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 8
; CHECK-NEXT: [[GEPLOAD28:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
@@ -91,7 +92,7 @@ define i32 @foo(ptr %A, i32 %t) {
; CHECK-NEXT: br i1 [[OR_COND56]], label [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE]], label [[IFMERGE_46]]
; CHECK: ifmerge.46:
; CHECK-NEXT: [[NEXTIVLOOP_32]] = add nuw nsw i64 [[I1_I64_0]], 1
-; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV1]], 4
; CHECK-NEXT: [[CONDLOOP_32:%.*]] = icmp ult i64 [[NEXTIVLOOP_32]], 12
; CHECK-NEXT: br i1 [[CONDLOOP_32]], label [[LOOP_32]], label [[LOOP_25:%.*]]
; CHECK: loop.25:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll
index 883cd134952379..376831faa99fbd 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll
@@ -102,23 +102,24 @@ define i64 @test_duplicated_phis(i64 noundef %N) {
; LEGACYPM-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
; LEGACYPM: for.body.preheader.new:
; LEGACYPM-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
+; LEGACYPM-NEXT: [[TMP1:%.*]] = add i64 [[UNROLL_ITER]], -4
+; LEGACYPM-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
+; LEGACYPM-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1
+; LEGACYPM-NEXT: [[TMP4:%.*]] = sub i64 -3, [[TMP3]]
; LEGACYPM-NEXT: br label [[FOR_BODY:%.*]]
; LEGACYPM: for.body:
-; LEGACYPM-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ -1, [[FOR_BODY_PREHEADER_NEW]] ]
; LEGACYPM-NEXT: [[I_07:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ]
; LEGACYPM-NEXT: [[INC_3]] = add i64 [[I_07]], 4
-; LEGACYPM-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
; LEGACYPM-NEXT: [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]]
; LEGACYPM-NEXT: br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
; LEGACYPM: for.end.loopexit.unr-lcssa.loopexit:
-; LEGACYPM-NEXT: [[LSR_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT]], [[FOR_BODY]] ]
-; LEGACYPM-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_NEXT]], 1
+; LEGACYPM-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1
; LEGACYPM-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
; LEGACYPM: for.end.loopexit.unr-lcssa:
-; LEGACYPM-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TMP1]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; LEGACYPM-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT_LCSSA]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; LEGACYPM-NEXT: [[TMP2:%.*]] = and i64 [[N]], 1
-; LEGACYPM-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP2]], 0
+; LEGACYPM-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; LEGACYPM-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[TMP4]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; LEGACYPM-NEXT: [[TMP6:%.*]] = and i64 [[N]], 1
+; LEGACYPM-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP6]], 0
; LEGACYPM-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]]
; LEGACYPM-NEXT: br label [[FOR_END]]
; LEGACYPM: for.end:
More information about the llvm-commits
mailing list