[llvm] b33f5e7 - [LSR] Use evaluateAtIteration in lsr-term-fold

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 21 08:13:11 PDT 2023


Author: Philip Reames
Date: 2023-03-21T08:11:36-07:00
New Revision: b33f5e7ed3cd04797b721e32b982b2cf6e06e192

URL: https://github.com/llvm/llvm-project/commit/b33f5e7ed3cd04797b721e32b982b2cf6e06e192
DIFF: https://github.com/llvm/llvm-project/commit/b33f5e7ed3cd04797b721e32b982b2cf6e06e192.diff

LOG: [LSR] Use evaluateAtIteration in lsr-term-fold

This is a follow up to one of the side discussions on D146429.  There are two semantic changes contained here.

The motivation for the change to the legality condition introduced in D146429 comes from the fact that we only check the post-inc form. As such, as long as the values of the post-inc variable don't self wrap, it's actually okay if we wrap past the starting value of the pre-inc IV.

Second, Nikic noticed during review that the test changes changed behavior for TC=0 (i.e. N=0 in the tests).  On more careful inspection, it became apparent that the previous manual expansion code was incorrect in the case where the primary IV could wrap without poison, and started with the limit value (i.e. i8 post-inc starts at 255 for 0 exit test, implying pre-inc starts with 0).  See @wrap_around test for an example of the (previous) miscompile.

Differential Revision: https://reviews.llvm.org/D146457

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 0a4d815e67206..7c6cea0dd1497 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6763,33 +6763,26 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
     }
 
     // Check that we can compute the value of AddRec on the exiting iteration
-    // without soundness problems.  There are two cases to be worried about:
-    // 1) BECount could be 255 with type i8.  Simply adding one would be
-    //    incorrect.  We may need one extra bit to represent the unsigned
-    //    trip count.
-    // 2) The multiplication of stride by TC may wrap around.  This is subtle
-    //    because computing the result accounting for wrap is insufficient.
-    //    In order to use the result in an exit test, we must also know that
-    //    AddRec doesn't take the same value on any previous iteration.
-    //    The simplest case to consider is a candidate IV which is narrower
-    //    than the trip count (and thus original IV), but this can also
-    //    happen due to non-unit strides on the candidate IVs.
+    // without soundness problems.  evaluateAtIteration internally needs
+    // to multiply the stride of the iteration number - which may wrap around.
+    // The issue here is subtle because computing the result accounting for
+    // wrap is insufficient. In order to use the result in an exit test, we
+    // must also know that AddRec doesn't take the same value on any previous
+    // iteration. The simplest case to consider is a candidate IV which is
+    // narrower than the trip count (and thus original IV), but this can
+    // also happen due to non-unit strides on the candidate IVs.
+    // TODO: This check should be replaceable with PostInc->hasNoSelfWrap(),
+    // but in practice we appear to be missing inference for cases we should
+    // be able to catch.
     ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE));
     ConstantRange BECountCR = SE.getUnsignedRange(BECount);
-    unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + 1 + StepCR.getMinSignedBits();
+    unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + StepCR.getMinSignedBits();
     unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType());
     if (NoOverflowBitWidth > ARBitWidth)
       continue;
 
-    const SCEV *TermValueSLocal = SE.getAddExpr(
-        AddRec->getOperand(0),
-        SE.getTruncateOrZeroExtend(
-            SE.getMulExpr(
-                AddRec->getOperand(1),
-                SE.getTruncateOrZeroExtend(
-                    SE.getAddExpr(BECount, SE.getOne(BECount->getType())),
-                    AddRec->getOperand(1)->getType())),
-            AddRec->getOperand(0)->getType()));
+    const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
+    const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
     if (!Expander.isSafeToExpand(TermValueSLocal)) {
       LLVM_DEBUG(
           dbgs() << "Is not safe to expand terminating value for phi node" << PN

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
index 83c4f64b041b9..b7f55b2172ea6 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
@@ -39,10 +39,11 @@ define void @runtime_tripcount(ptr %a, i32 %N) {
 ; CHECK-LABEL: @runtime_tripcount(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 84
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[N:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 88
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
@@ -72,13 +73,14 @@ for.end:                                          ; preds = %for.body
 
 ; In this case, the i8 IVs increment *isn't* nsw.  As a result, a N of 0
 ; is well defined, and thus the post-inc starts at 255.
-; FIXME: miscompile
 define void @wrap_around(ptr %a, i8 %N) {
 ; CHECK-LABEL: @wrap_around(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[N:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[N:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ]
@@ -112,14 +114,16 @@ define void @ptr_of_ptr_addrec(ptr %ptrptr, i32 %length) {
 ; CHECK-LABEL: @ptr_of_ptr_addrec(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[START_PTRPTR:%.*]] = getelementptr ptr, ptr [[PTRPTR:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[LENGTH:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 8
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP3]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[IT_04]], align 8
-; CHECK-NEXT:    tail call void @foo(ptr [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[IT_04]], align 8
+; CHECK-NEXT:    tail call void @foo(ptr [[TMP4]])
 ; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[IT_04]], i64 1
 ; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]]
 ; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -152,9 +156,11 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) {
 ; CHECK-NEXT:    [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
 ; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[LENGTH]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LENGTH]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 8
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP3]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit:
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
@@ -162,8 +168,8 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) {
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DST_04]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = call ptr @foo(ptr [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DST_04]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = call ptr @foo(ptr [[TMP4]])
 ; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1
 ; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]]
 ; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]


        


More information about the llvm-commits mailing list