[llvm] 7ad786a - [LSR] Generalize one aspect of terminator folding (recently introduced in D132443)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 20 12:19:50 PST 2023


Author: Philip Reames
Date: 2023-01-20T12:19:43-08:00
New Revision: 7ad786a29e7bcd75bf3e7af2d96a0bf419faff64

URL: https://github.com/llvm/llvm-project/commit/7ad786a29e7bcd75bf3e7af2d96a0bf419faff64
DIFF: https://github.com/llvm/llvm-project/commit/7ad786a29e7bcd75bf3e7af2d96a0bf419faff64.diff

LOG: [LSR] Generalize one aspect of terminator folding (recently introduced in D132443)

There's no need to require the start value to come directly from the loop predecessor.  This was sometimes covering up a latent miscompile in this off-by-default option, but the miscompile needs fixed anyways and the issue has been raised on the original review.

Differential Revision: https://reviews.llvm.org/D142240

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
    llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 7325694eec07a..f04420bbf44a5 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6625,7 +6625,6 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
     return std::nullopt;
   }
 
-  BasicBlock *LoopPreheader = L->getLoopPreheader();
   BasicBlock *LoopLatch = L->getLoopLatch();
 
   // TODO: Can we do something for greater than and less than?
@@ -6712,12 +6711,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
                  << "\n");
       return {false, nullptr};
     }
-    // TODO: Right now we limit the phi node to help the folding be of a start
-    // value of getelementptr. We can extend to any kinds of IV as long as it is
-    // an affine AddRec. Add a switch to cover more types of instructions here
-    // and down in the actual transformation.
-    return {isa<GetElementPtrInst>(PN.getIncomingValueForBlock(LoopPreheader)),
-            TermValueS};
+    return {true, TermValueS};
   };
 
   PHINode *ToFold = nullptr;
@@ -6850,15 +6844,12 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
       SCEVExpanderCleaner ExpCleaner(Expander);
 
       // Create new terminating value at loop header
-      GetElementPtrInst *StartValueGEP = cast<GetElementPtrInst>(StartValue);
-      Type *PtrTy = StartValueGEP->getPointerOperand()->getType();
-
       const SCEV *TermValueS = CanFoldTerminatingCondition->second.second;
       assert(
           Expander.isSafeToExpand(TermValueS) &&
           "Terminating value was checked safe in canFoldTerminatingCondition");
 
-      Value *TermValue = Expander.expandCodeFor(TermValueS, PtrTy,
+      Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(),
                                                 LoopPreheader->getTerminator());
 
       LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
index 21bb8e5bd1cef..2856da967ffa2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
@@ -51,29 +51,6 @@ for.body4:                                        ; preds = %for.body, %for.body
   br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
 }
 
-; The terminating condition folding transformation cannot find the ptr IV
-; because it checks if the value comes from the LoopPreheader. %mark is from
-; the function argument, so it is not qualified for the transformation.
-define void @no_iv_to_help(ptr %mark, i32 signext %length) {
-; CHECK: Cannot find other AddRec IV to help folding
-entry:
-  %tobool.not3 = icmp eq i32 %length, 0
-  br i1 %tobool.not3, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup:                                 ; preds = %for.body, %entry
-  ret void
-
-for.body:                                         ; preds = %entry, %for.body
-  %i.05 = phi i32 [ %dec, %for.body ], [ %length, %entry ]
-  %dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %mark, %entry ]
-  %0 = load ptr, ptr %dst.04, align 8
-  call ptr @foo(ptr %0)
-  %incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1
-  %dec = add nsw i32 %i.05, -1
-  %tobool.not = icmp eq i32 %dec, 0
-  br i1 %tobool.not, label %for.cond.cleanup, label %for.body
-}
-
 declare void @foo(ptr)
 
 define void @NonAddRecIV(ptr %a) {

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
index 30fbed966dfa3..23b06ba8de249 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
@@ -133,3 +133,42 @@ for.end:                                 ; preds = %for.body
 }
 
 declare void @foo(ptr)
+
+define void @iv_start_non_preheader(ptr %mark, i32 signext %length) {
+; CHECK-LABEL: @iv_start_non_preheader(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[LENGTH]], 2
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DST_04]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call ptr @foo(ptr [[TMP1]])
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1
+; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]]
+; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+;
+entry:
+  %tobool.not3 = icmp eq i32 %length, 0
+  br i1 %tobool.not3, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %dec, %for.body ], [ %length, %entry ]
+  %dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %mark, %entry ]
+  %0 = load ptr, ptr %dst.04, align 8
+  call ptr @foo(ptr %0)
+  %incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1
+  %dec = add nsw i32 %i.05, -1
+  %tobool.not = icmp eq i32 %dec, 0
+  br i1 %tobool.not, label %for.cond.cleanup, label %for.body
+}


        


More information about the llvm-commits mailing list