[llvm] b1b4758 - [LSR] Hoist IVInc to loop header if its all uses are in the loop header

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 10 02:34:07 PST 2023


Author: chenglin.bi
Date: 2023-01-10T18:34:00+08:00
New Revision: b1b4758e7f4b2ffe1faa28b00eb037832e5d26a7

URL: https://github.com/llvm/llvm-project/commit/b1b4758e7f4b2ffe1faa28b00eb037832e5d26a7
DIFF: https://github.com/llvm/llvm-project/commit/b1b4758e7f4b2ffe1faa28b00eb037832e5d26a7.diff

LOG: [LSR] Hoist IVInc to loop header if its all uses are in the loop header

When the latch block is different from header block, IVInc will be expanded in the latch loop. We can't generate the post index load/store this case.
But if the IVInc only used in the loop, actually we still can use the post index load/store because when exit loop we don't care the last IVInc value.
So, trying to hoist IVInc to help backend to generate more post index load/store.

Fix #53625

Reviewed By: eopXD

Differential Revision: https://reviews.llvm.org/D138636

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e60382bc933d5..7b8bce6657382 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -5650,6 +5650,36 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
     DeadInsts.emplace_back(OperandIsInstr);
 }
 
+// Trying to hoist the IVInc to loop header if all IVInc users are in
+// the loop header. It will help backend to generate post index load/store
+// when the latch block is 
diff erent from loop header block.
+static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
+                          const LSRUse &LU, Instruction *IVIncInsertPos,
+                          Loop *L) {
+  if (LU.Kind != LSRUse::Address)
+    return false;
+
+  // For now this code do the conservative optimization, only work for
+  // the header block. Later we can hoist the IVInc to the block post
+  // dominate all users.
+  BasicBlock *LHeader = L->getHeader();
+  if (IVIncInsertPos->getParent() == LHeader)
+    return false;
+
+  if (!Fixup.OperandValToReplace ||
+      any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {
+        Instruction *UI = cast<Instruction>(U);
+        return UI->getParent() != LHeader;
+      }))
+    return false;
+
+  Instruction *I = Fixup.UserInst;
+  return (isa<LoadInst>(I) &&
+          TTI.isIndexedLoadLegal(TTI.MIM_PostInc, I->getType())) ||
+         (isa<StoreInst>(I) &&
+          TTI.isIndexedStoreLegal(TTI.MIM_PostInc, I->getType()));
+}
+
 /// Rewrite all the fixup locations with new values, following the chosen
 /// solution.
 void LSRInstance::ImplementSolution(
@@ -5658,8 +5688,6 @@ void LSRInstance::ImplementSolution(
   // we can remove them after we are done working.
   SmallVector<WeakTrackingVH, 16> DeadInsts;
 
-  Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
-
   // Mark phi nodes that terminate chains so the expander tries to reuse them.
   for (const IVChain &Chain : IVChainVec) {
     if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
@@ -5669,6 +5697,11 @@ void LSRInstance::ImplementSolution(
   // Expand the new value definitions and update the users.
   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
     for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
+      Instruction *InsertPos =
+          canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)
+              ? L->getHeader()->getTerminator()
+              : IVIncInsertPos;
+      Rewriter.setIVIncInsertPos(L, InsertPos);
       Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
       Changed = true;
     }

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
index 515de763b672a..7f7e009597b37 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
@@ -13,11 +13,10 @@ define i32 @test(i32 %c, ptr %a, ptr %b) {
 ; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:  .LBB0_2: // %for.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr w9, [x1]
+; CHECK-NEXT:    ldr w9, [x1], #4
 ; CHECK-NEXT:    cbnz w9, .LBB0_5
 ; CHECK-NEXT:  // %bb.3: // %for.cond
 ; CHECK-NEXT:    // in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    add x1, x1, #4
 ; CHECK-NEXT:    subs x8, x8, #1
 ; CHECK-NEXT:    b.ne .LBB0_2
 ; CHECK-NEXT:  .LBB0_4:
@@ -35,13 +34,13 @@ for.body.preheader:                               ; preds = %entry
   br label %for.body
 
 for.cond:                                         ; preds = %for.body
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count
   br i1 %exitcond.not, label %return, label %for.body
 
 for.body:                                         ; preds = %for.body.preheader, %for.cond
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.cond ]
-  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %iv = phi i64 [ 0, %for.body.preheader ], [ %iv.next, %for.cond ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
   %val = load i32, ptr %arrayidx, align 4
   %tobool3.not = icmp eq i32 %val, 0
   br i1 %tobool3.not, label %for.cond, label %return
@@ -50,3 +49,71 @@ return:                                           ; preds = %for.cond, %for.body
   %retval.1 = phi i32 [ 0, %entry ], [ 0, %for.cond ], [ 1, %for.body ]
   ret i32 %retval.1
 }
+
+; negative case: %arrayidx.b is not in header
+
+define i64 @IVIncHoist_not_all_user_in_header(i32 %c, ptr %a, ptr %b) {
+; CHECK-LABEL: IVIncHoist_not_all_user_in_header:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmp w0, #1
+; CHECK-NEXT:    b.lt .LBB1_5
+; CHECK-NEXT:  // %bb.1: // %for.body.preheader
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    mov w9, w0
+; CHECK-NEXT:    add x10, x1, #4
+; CHECK-NEXT:    add x11, x2, #8
+; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:  .LBB1_2: // %for.body
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr w12, [x10, x8, lsl #2]
+; CHECK-NEXT:    cbnz w12, .LBB1_7
+; CHECK-NEXT:  // %bb.3: // %if.then
+; CHECK-NEXT:    // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    ldr w12, [x11, x8, lsl #2]
+; CHECK-NEXT:    cbnz w12, .LBB1_6
+; CHECK-NEXT:  // %bb.4: // %for.cond
+; CHECK-NEXT:    // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    add x8, x8, #1
+; CHECK-NEXT:    cmp x9, x8
+; CHECK-NEXT:    b.ne .LBB1_2
+; CHECK-NEXT:  .LBB1_5:
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_6: // %if.then.return.loopexit_crit_edge
+; CHECK-NEXT:    add x0, x8, #3
+; CHECK-NEXT:  .LBB1_7: // %return
+; CHECK-NEXT:    ret
+entry:
+  %cmp13 = icmp sgt i32 %c, 0
+  br i1 %cmp13, label %for.body.preheader, label %return
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %c to i64
+  br label %for.body
+
+for.cond:                                         ; preds = %for.body
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %return, label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.cond
+  %iv = phi i64 [ 0, %for.body.preheader ], [ %iv.next, %for.cond ]
+  %iv.a = phi i64 [ 1, %for.body.preheader ], [ %iv.next.a, %for.cond ]
+  %iv.b = phi i64 [ 2, %for.body.preheader ], [ %iv.next.b, %for.cond ]
+  %arrayidx.a = getelementptr inbounds i32, ptr %a, i64 %iv.a
+  %iv.next.a = add nuw nsw i64 %iv.a, 1
+  %val.a = load i32, ptr %arrayidx.a, align 4
+  %tobool3.not = icmp eq i32 %val.a, 0
+  br i1 %tobool3.not, label %if.then, label %return
+
+if.then:
+  %arrayidx.b = getelementptr inbounds i32, ptr %b, i64 %iv.b
+  %iv.next.b = add nuw nsw i64 %iv.b, 1
+  %val.b = load i32, ptr %arrayidx.b, align 4
+  %tobool4.not = icmp eq i32 %val.b, 0
+  br i1 %tobool4.not, label %for.cond, label %return
+
+return:                                           ; preds = %for.cond, %for.body, %entry
+  %retval.1 = phi i64 [ 0, %entry ], [ 0, %for.cond ], [ 1, %for.body ], [ %iv.next.b, %if.then ]
+  ret i64 %retval.1
+}


        


More information about the llvm-commits mailing list