[llvm] 2cb4d1b - [LSR] Do not create duplicated PHI nodes while preserving LCSSA form (#107380)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 6 08:39:51 PDT 2024


Author: Sergey Kachkov
Date: 2024-09-06T18:39:47+03:00
New Revision: 2cb4d1b1bd7bde2724b79976e859684bd3f5c771

URL: https://github.com/llvm/llvm-project/commit/2cb4d1b1bd7bde2724b79976e859684bd3f5c771
DIFF: https://github.com/llvm/llvm-project/commit/2cb4d1b1bd7bde2724b79976e859684bd3f5c771.diff

LOG: [LSR] Do not create duplicated PHI nodes while preserving LCSSA form (#107380)

Motivating example: https://godbolt.org/z/eb97zrxhx
Here we have 2 induction variables in the loop: one is corresponding to
i variable (add rdx, 4), the other - to res (add rax, 2). The second
induction variable can be removed by rewriteLoopExitValues() method
(final value of res at loop exit is unroll_iter * -2); however, this
doesn't happen because we have duplicated LCSSA phi nodes at loop exit:
```
; Preheader:
for.body.preheader.new:                           ; preds = %for.body.preheader
  %unroll_iter = and i64 %N, -4
  br label %for.body

; Loop:
for.body:                                         ; preds = %for.body, %for.body.preheader.new
  %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 0, %for.body.preheader.new ]
  %i.07 = phi i64 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
  %inc.3 = add nuw i64 %i.07, 4
  %lsr.iv.next = add nsw i64 %lsr.iv, -2
  %niter.ncmp.3.not = icmp eq i64 %unroll_iter, %inc.3
  br i1 %niter.ncmp.3.not, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !llvm.loop !7

; Exit blocks
for.end.loopexit.unr-lcssa.loopexit:              ; preds = %for.body
  %inc.3.lcssa = phi i64 [ %inc.3, %for.body ]
  %lsr.iv.next.lcssa11 = phi i64 [ %lsr.iv.next, %for.body ]
  %lsr.iv.next.lcssa = phi i64 [ %lsr.iv.next, %for.body ]
  br label %for.end.loopexit.unr-lcssa
```
rewriteLoopExitValues requires %lsr.iv.next value to have only 2 uses:
one in LCSSA phi node, the other - in induction phi node. Here we have 3
uses of this value because of duplicated lcssa nodes, so the transform
doesn't apply and leads to an extra add operation inside the loop. The
proposed solution is to accumulate inserted instructions that will
require LCSSA form update into SetVector and then call
formLCSSAForInstructions for this SetVector once, so the same
instructions don't process twice.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
    llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll
    llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
    llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll
    llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll
    llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 3ca3818938fd26..f966ccaa838422 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2186,6 +2186,12 @@ class LSRInstance {
   /// Induction variables that were generated and inserted by the SCEV Expander.
   SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
 
+  // Inserting instructions in the loop and using them as PHI's input could
+  // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
+  // corresponding incoming block is not loop exiting). So collect all such
+  // instructions to form LCSSA for them later.
+  SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
+
   void OptimizeShadowIV();
   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
@@ -2276,9 +2282,9 @@ class LSRInstance {
                 SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
   void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
                      const Formula &F,
-                     SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
+                     SmallVectorImpl<WeakTrackingVH> &DeadInsts);
   void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
-               SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
+               SmallVectorImpl<WeakTrackingVH> &DeadInsts);
   void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
 
 public:
@@ -5858,17 +5864,11 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
 /// Helper for Rewrite. PHI nodes are special because the use of their operands
 /// effectively happens in their predecessor blocks, so the expression may need
 /// to be expanded in multiple places.
-void LSRInstance::RewriteForPHI(
-    PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
-    SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
+void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,
+                                const LSRFixup &LF, const Formula &F,
+                                SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   DenseMap<BasicBlock *, Value *> Inserted;
 
-  // Inserting instructions in the loop and using them as PHI's input could
-  // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
-  // corresponding incoming block is not loop exiting). So collect all such
-  // instructions to form LCSSA for them later.
-  SmallVector<Instruction *, 4> InsertedNonLCSSAInsts;
-
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
     if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
       bool needUpdateFixups = false;
@@ -5939,7 +5939,7 @@ void LSRInstance::RewriteForPHI(
         // the inserted value.
         if (auto *I = dyn_cast<Instruction>(FullV))
           if (L->contains(I) && !L->contains(BB))
-            InsertedNonLCSSAInsts.push_back(I);
+            InsertedNonLCSSAInsts.insert(I);
 
         PN->setIncomingValue(i, FullV);
         Pair.first->second = FullV;
@@ -5983,8 +5983,6 @@ void LSRInstance::RewriteForPHI(
             }
       }
     }
-
-  formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE);
 }
 
 /// Emit instructions for the leading candidate expression for this LSRUse (this
@@ -5992,7 +5990,7 @@ void LSRInstance::RewriteForPHI(
 /// expanded value.
 void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
                           const Formula &F,
-                          SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
+                          SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   // First, find an insertion point that dominates UserInst. For PHI nodes,
   // find the nearest block which dominates all the relevant uses.
   if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
@@ -6080,6 +6078,9 @@ void LSRInstance::ImplementSolution(
       Changed = true;
     }
 
+  auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
+  formLCSSAForInstructions(InsertedInsts, DT, LI, &SE);
+
   for (const IVChain &Chain : IVChainVec) {
     GenerateIVChain(Chain, DeadInsts);
     Changed = true;

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll b/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
index bf52c968ad8708..7195d4cab96f47 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
@@ -24,15 +24,15 @@ define ptr @test1() {
 ; CHECK-NEXT:    br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
 ; CHECK:       bbA:
 ; CHECK-NEXT:    switch i32 0, label [[BBA_BB89_CRIT_EDGE:%.*]] [
-; CHECK-NEXT:    i32 47, label [[BBA_BB89_CRIT_EDGE]]
-; CHECK-NEXT:    i32 58, label [[BBA_BB89_CRIT_EDGE]]
+; CHECK-NEXT:      i32 47, label [[BBA_BB89_CRIT_EDGE]]
+; CHECK-NEXT:      i32 58, label [[BBA_BB89_CRIT_EDGE]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bbA.bb89_crit_edge:
 ; CHECK-NEXT:    br label [[BB89:%.*]]
 ; CHECK:       bbB:
 ; CHECK-NEXT:    switch i8 0, label [[BBB_BB89_CRIT_EDGE:%.*]] [
-; CHECK-NEXT:    i8 47, label [[BBB_BB89_CRIT_EDGE]]
-; CHECK-NEXT:    i8 58, label [[BBB_BB89_CRIT_EDGE]]
+; CHECK-NEXT:      i8 47, label [[BBB_BB89_CRIT_EDGE]]
+; CHECK-NEXT:      i8 58, label [[BBB_BB89_CRIT_EDGE]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bbB.bb89_crit_edge:
 ; CHECK-NEXT:    br label [[BB89]]
@@ -85,23 +85,22 @@ define ptr @test2() {
 ; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 1
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
 ; CHECK:       loopexit:
-; CHECK-NEXT:    [[SCEVGEP_LCSSA1:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
 ; CHECK-NEXT:    [[SCEVGEP_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
 ; CHECK-NEXT:    br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
 ; CHECK:       bbA:
 ; CHECK-NEXT:    switch i32 0, label [[BB89:%.*]] [
-; CHECK-NEXT:    i32 47, label [[BB89]]
-; CHECK-NEXT:    i32 58, label [[BB89]]
+; CHECK-NEXT:      i32 47, label [[BB89]]
+; CHECK-NEXT:      i32 58, label [[BB89]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bbB:
 ; CHECK-NEXT:    switch i8 0, label [[BBB_EXIT_CRIT_EDGE:%.*]] [
-; CHECK-NEXT:    i8 47, label [[BBB_EXIT_CRIT_EDGE]]
-; CHECK-NEXT:    i8 58, label [[BBB_EXIT_CRIT_EDGE]]
+; CHECK-NEXT:      i8 47, label [[BBB_EXIT_CRIT_EDGE]]
+; CHECK-NEXT:      i8 58, label [[BBB_EXIT_CRIT_EDGE]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bbB.exit_crit_edge:
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       bb89:
-; CHECK-NEXT:    [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ]
+; CHECK-NEXT:    [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[RESULT:%.*]] = phi ptr [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP_LCSSA]], [[BBB_EXIT_CRIT_EDGE]] ]

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll
index b4fb4fe7aaf969..737a590394e5ff 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll
@@ -16,8 +16,8 @@ define amdgpu_kernel void @scaledregtest() local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       loopexit:
-; CHECK-NEXT:    [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[SCEVGEP11_LCSSA:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP11:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY_1:%.*]]
 ; CHECK:       for.body.1:
 ; CHECK-NEXT:    [[LSR_IV5:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP6:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP11_LCSSA]], [[LOOPEXIT:%.*]] ]

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
index fbb9e2a7b6b828..841836c7d2dd86 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
@@ -20,16 +20,17 @@ define i64 @sqlite3DropTriggerPtr() nounwind {
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movq %rbx, %rcx
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    je .LBB0_3
+; CHECK-NEXT:    je .LBB0_4
 ; CHECK-NEXT:  # %bb.2: # %bb4
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    leaq 1(%rcx), %rbx
+; CHECK-NEXT:    incq %rbx
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB0_1
-; CHECK-NEXT:  .LBB0_3: # %bb8
-; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:  # %bb.3: # %bb8split
+; CHECK-NEXT:    decq %rbx
+; CHECK-NEXT:  .LBB0_4: # %bb8
+; CHECK-NEXT:    movq %rbx, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
 bb:

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll
index 29c03b88c5fb1a..d652e5c5aa0601 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll
@@ -21,8 +21,8 @@ define i64 @blam(ptr %start, ptr %end, ptr %ptr.2) {
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[END:%.*]]
 ; CHECK-NEXT:    br i1 [[EC]], label [[LOOP_2_PH:%.*]], label [[LOOP_1_HEADER]]
 ; CHECK:       loop.2.ph:
-; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
 ; CHECK-NEXT:    [[LSR_IV_NEXT5_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT5]], [[LOOP_1_HEADER]] ]
+; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
 ; CHECK-NEXT:    br label [[LOOP_2_HEADER:%.*]]
 ; CHECK:       loop.2.header:
 ; CHECK-NEXT:    [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[LOOP_2_LATCH:%.*]] ], [ [[LSR_IV_NEXT5_LCSSA]], [[LOOP_2_PH]] ]

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll
index b13503543d6ee7..ae24da06415cce 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll
@@ -18,23 +18,24 @@ define i32 @foo(ptr %A, i32 %t) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP_32:%.*]]
 ; CHECK:       loop.exit.loopexitsplitsplitsplit:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[LSR_IV:%.*]], -1
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV1:%.*]], [[IFMERGE_34:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[LSR_IV]], -1
 ; CHECK-NEXT:    br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT:%.*]]
 ; CHECK:       ifmerge.38.loop.exit.loopexitsplitsplit_crit_edge:
-; CHECK-NEXT:    [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_38:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_38:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT]]
 ; CHECK:       loop.exit.loopexitsplitsplit:
 ; CHECK-NEXT:    [[INDVARS_IV_LCSSA_PH_PH_PH:%.*]] = phi i64 [ [[LSR_IV_LCSSA10]], [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP_EXIT_LOOPEXITSPLIT:%.*]]
 ; CHECK:       ifmerge.42.loop.exit.loopexitsplit_crit_edge:
-; CHECK-NEXT:    [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_42:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_42:%.*]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[LSR_IV_LCSSA11]], 1
 ; CHECK-NEXT:    br label [[LOOP_EXIT_LOOPEXITSPLIT]]
 ; CHECK:       loop.exit.loopexitsplit:
 ; CHECK-NEXT:    [[INDVARS_IV_LCSSA_PH_PH:%.*]] = phi i64 [ [[TMP1]], [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ]
 ; CHECK-NEXT:    br label [[LOOP_EXIT_LOOPEXIT:%.*]]
 ; CHECK:       then.34.loop.exit.loopexit_crit_edge:
-; CHECK-NEXT:    [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV]], [[THEN_34:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV1]], [[THEN_34:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[LSR_IV_LCSSA]], -2
 ; CHECK-NEXT:    br label [[LOOP_EXIT_LOOPEXIT]]
 ; CHECK:       loop.exit.loopexit:
@@ -48,23 +49,23 @@ define i32 @foo(ptr %A, i32 %t) {
 ; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP]], [[LOOP_EXIT]] ], [ 50, [[THEN_8_1]] ], [ 50, [[IFMERGE_8:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
 ; CHECK:       loop.32:
-; CHECK-NEXT:    [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV1]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[I1_I64_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTIVLOOP_32:%.*]], [[IFMERGE_46]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
 ; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 -4
 ; CHECK-NEXT:    [[GEPLOAD:%.*]] = load i32, ptr [[SCEVGEP8]], align 4
 ; CHECK-NEXT:    [[CMP_34:%.*]] = icmp sgt i32 [[GEPLOAD]], [[T]]
-; CHECK-NEXT:    br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34:%.*]]
+; CHECK-NEXT:    br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34]]
 ; CHECK:       then.34:
-; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
 ; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 -8
 ; CHECK-NEXT:    [[GEPLOAD18:%.*]] = load i32, ptr [[SCEVGEP6]], align 4
 ; CHECK-NEXT:    [[CMP_35:%.*]] = icmp slt i32 [[GEPLOAD18]], [[T]]
 ; CHECK-NEXT:    br i1 [[CMP_35]], label [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE]], label [[IFMERGE_34]]
 ; CHECK:       ifmerge.34:
-; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
 ; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[GEPLOAD20:%.*]] = load i32, ptr [[SCEVGEP4]], align 4
 ; CHECK-NEXT:    [[CMP_38:%.*]] = icmp sgt i32 [[GEPLOAD20]], [[T]]
@@ -72,7 +73,7 @@ define i32 @foo(ptr %A, i32 %t) {
 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP_38]], [[CMP_39]]
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT]], label [[IFMERGE_38]]
 ; CHECK:       ifmerge.38:
-; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
 ; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
 ; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4
 ; CHECK-NEXT:    [[GEPLOAD24:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
@@ -81,7 +82,7 @@ define i32 @foo(ptr %A, i32 %t) {
 ; CHECK-NEXT:    [[OR_COND55:%.*]] = and i1 [[CMP_42]], [[CMP_43]]
 ; CHECK-NEXT:    br i1 [[OR_COND55]], label [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE]], label [[IFMERGE_42]]
 ; CHECK:       ifmerge.42:
-; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]]
 ; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 8
 ; CHECK-NEXT:    [[GEPLOAD28:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
@@ -91,7 +92,7 @@ define i32 @foo(ptr %A, i32 %t) {
 ; CHECK-NEXT:    br i1 [[OR_COND56]], label [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE]], label [[IFMERGE_46]]
 ; CHECK:       ifmerge.46:
 ; CHECK-NEXT:    [[NEXTIVLOOP_32]] = add nuw nsw i64 [[I1_I64_0]], 1
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV1]], 4
 ; CHECK-NEXT:    [[CONDLOOP_32:%.*]] = icmp ult i64 [[NEXTIVLOOP_32]], 12
 ; CHECK-NEXT:    br i1 [[CONDLOOP_32]], label [[LOOP_32]], label [[LOOP_25:%.*]]
 ; CHECK:       loop.25:

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll
index 0add19e286f583..376831faa99fbd 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll
@@ -89,3 +89,119 @@ loop_exit_7:                                      ; preds = %be_6, %loop_4
   %val_i32_24.lcssa = phi i32 [ %val_i32_24, %be_6 ], [ %val_i32_24, %loop_4 ]
   br label %bb_5
 }
+
+define i64 @test_duplicated_phis(i64 noundef %N) {
+; LEGACYPM-LABEL: define i64 @test_duplicated_phis
+; LEGACYPM-SAME: (i64 noundef [[N:%.*]]) {
+; LEGACYPM-NEXT:  entry:
+; LEGACYPM-NEXT:    [[MUL:%.*]] = shl i64 [[N]], 1
+; LEGACYPM-NEXT:    [[CMP6_NOT:%.*]] = icmp eq i64 [[MUL]], 0
+; LEGACYPM-NEXT:    br i1 [[CMP6_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; LEGACYPM:       for.body.preheader:
+; LEGACYPM-NEXT:    [[TMP0:%.*]] = icmp ult i64 [[MUL]], 4
+; LEGACYPM-NEXT:    br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; LEGACYPM:       for.body.preheader.new:
+; LEGACYPM-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
+; LEGACYPM-NEXT:    [[TMP1:%.*]] = add i64 [[UNROLL_ITER]], -4
+; LEGACYPM-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
+; LEGACYPM-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1
+; LEGACYPM-NEXT:    [[TMP4:%.*]] = sub i64 -3, [[TMP3]]
+; LEGACYPM-NEXT:    br label [[FOR_BODY:%.*]]
+; LEGACYPM:       for.body:
+; LEGACYPM-NEXT:    [[I_07:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ]
+; LEGACYPM-NEXT:    [[INC_3]] = add i64 [[I_07]], 4
+; LEGACYPM-NEXT:    [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]]
+; LEGACYPM-NEXT:    br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; LEGACYPM:       for.end.loopexit.unr-lcssa.loopexit:
+; LEGACYPM-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], 1
+; LEGACYPM-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; LEGACYPM:       for.end.loopexit.unr-lcssa:
+; LEGACYPM-NEXT:    [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; LEGACYPM-NEXT:    [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[TMP4]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; LEGACYPM-NEXT:    [[TMP6:%.*]] = and i64 [[N]], 1
+; LEGACYPM-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP6]], 0
+; LEGACYPM-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]]
+; LEGACYPM-NEXT:    br label [[FOR_END]]
+; LEGACYPM:       for.end:
+; LEGACYPM-NEXT:    [[RES_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; LEGACYPM-NEXT:    ret i64 [[RES_0_LCSSA]]
+;
+; NEWPM-LABEL: define i64 @test_duplicated_phis
+; NEWPM-SAME: (i64 noundef [[N:%.*]]) {
+; NEWPM-NEXT:  entry:
+; NEWPM-NEXT:    [[MUL:%.*]] = shl i64 [[N]], 1
+; NEWPM-NEXT:    [[CMP6_NOT:%.*]] = icmp eq i64 [[MUL]], 0
+; NEWPM-NEXT:    br i1 [[CMP6_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; NEWPM:       for.body.preheader:
+; NEWPM-NEXT:    [[TMP0:%.*]] = icmp ult i64 [[MUL]], 4
+; NEWPM-NEXT:    br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; NEWPM:       for.body.preheader.new:
+; NEWPM-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
+; NEWPM-NEXT:    br label [[FOR_BODY:%.*]]
+; NEWPM:       for.body:
+; NEWPM-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 3, [[FOR_BODY_PREHEADER_NEW]] ]
+; NEWPM-NEXT:    [[I_07:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ]
+; NEWPM-NEXT:    [[INC_3]] = add i64 [[I_07]], 4
+; NEWPM-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -2
+; NEWPM-NEXT:    [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]]
+; NEWPM-NEXT:    [[TMP1:%.*]] = add i64 [[LSR_IV_NEXT]], -3
+; NEWPM-NEXT:    br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; NEWPM:       for.end.loopexit.unr-lcssa.loopexit:
+; NEWPM-NEXT:    [[REASS_SUB_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT]], [[FOR_BODY]] ]
+; NEWPM-NEXT:    [[RES_1_3_LCSSA:%.*]] = phi i64 [ [[TMP1]], [[FOR_BODY]] ]
+; NEWPM-NEXT:    [[TMP2:%.*]] = add i64 [[REASS_SUB_LCSSA]], -4
+; NEWPM-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; NEWPM:       for.end.loopexit.unr-lcssa:
+; NEWPM-NEXT:    [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[RES_1_3_LCSSA]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; NEWPM-NEXT:    [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[TMP2]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; NEWPM-NEXT:    [[TMP3:%.*]] = and i64 [[N]], 1
+; NEWPM-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP3]], 0
+; NEWPM-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]]
+; NEWPM-NEXT:    br label [[FOR_END]]
+; NEWPM:       for.end:
+; NEWPM-NEXT:    [[RES_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; NEWPM-NEXT:    ret i64 [[RES_0_LCSSA]]
+;
+entry:
+  %mul = shl i64 %N, 1
+  %cmp6.not = icmp eq i64 %mul, 0
+  br i1 %cmp6.not, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  %0 = icmp ult i64 %mul, 4
+  br i1 %0, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new
+
+for.body.preheader.new:
+  %unroll_iter = and i64 %mul, -4
+  br label %for.body
+
+for.body:
+  %res.09 = phi i64 [ 0, %for.body.preheader.new ], [ %res.1.3, %for.body ]
+  %i.07 = phi i64 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
+  %niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.3, %for.body ]
+  %res.1.1 = add i64 %res.09, -1
+  %inc.1 = or disjoint i64 %i.07, 2
+  %res.1.2 = add i64 %inc.1, %res.1.1
+  %reass.sub = sub i64 %res.1.2, %i.07
+  %res.1.3 = add i64 %reass.sub, -3
+  %inc.3 = add nuw i64 %i.07, 4
+  %niter.next.3 = add i64 %niter, 4
+  %niter.ncmp.3.not = icmp eq i64 %niter.next.3, %unroll_iter
+  br i1 %niter.ncmp.3.not, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body
+
+for.end.loopexit.unr-lcssa.loopexit:
+  %1 = add i64 %reass.sub, -4
+  br label %for.end.loopexit.unr-lcssa
+
+for.end.loopexit.unr-lcssa:
+  %res.1.lcssa.ph = phi i64 [ undef, %for.body.preheader ], [ %res.1.3, %for.end.loopexit.unr-lcssa.loopexit ]
+  %res.09.unr = phi i64 [ -1, %for.body.preheader ], [ %1, %for.end.loopexit.unr-lcssa.loopexit ]
+  %2 = and i64 %N, 1
+  %lcmp.mod.not = icmp eq i64 %2, 0
+  %spec.select = select i1 %lcmp.mod.not, i64 %res.1.lcssa.ph, i64 %res.09.unr
+  br label %for.end
+
+for.end:
+  %res.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.end.loopexit.unr-lcssa ]
+  ret i64 %res.0.lcssa
+}


        


More information about the llvm-commits mailing list