[llvm] 2124505 - [IRCE] Relax restrictions on IRCE's latch exit count

Max Kazantsev via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 13 02:00:27 PDT 2023


Author: Max Kazantsev
Date: 2023-04-13T16:00:19+07:00
New Revision: 2124505fe4c9ff85173c5ca3b14810f698436239

URL: https://github.com/llvm/llvm-project/commit/2124505fe4c9ff85173c5ca3b14810f698436239
DIFF: https://github.com/llvm/llvm-project/commit/2124505fe4c9ff85173c5ca3b14810f698436239.diff

LOG: [IRCE] Relax restrictions on IRCE's latch exit count

It seems that existing logic is too strict about latch block exit count.
It is required to be computable, however it is not used in any computations,
and effectively the only thing it is used for is to get the type of computed
exit count.

Sometimes the exit count for latch block is not known, but the loop is still
finite because of other exits, and safe bounds are still computable. In this case,
we miss an opportunity to apply IRCE.
We could instead use a more relaxed version - max symbolic exit count, which,
if exists, is enough to say that the loop is finite, and its type should be good enough.

There is a subtlety with type: we do not support latch count type wider than range
check type. Because of that, we want to have the narrowest type available. So if it
can be computed from latch block immediately, take it. Otherwise, take whatever whole
loop provides and hope that it's type isn't too wide.

Differential Revision: https://reviews.llvm.org/D147910
Reviewed By: danilaml

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
    llvm/test/Transforms/IRCE/stride_more_than_1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 7be124b2b98a0..4dde7919834ba 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -739,6 +739,19 @@ static bool isSafeIncreasingBound(const SCEV *Start,
           SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
 }
 
+/// Returns estimate for max latch taken count of the loop of the narrowest
+/// available type. If the latch block has such estimate, it is returned.
+/// Otherwise, we use max exit count of whole loop (that is potentially of wider
+/// type than latch check itself), which is still better than no estimate.
+static const SCEV *getNarrowestLatchMaxTakenCountEstimate(ScalarEvolution &SE,
+                                                          const Loop &L) {
+  const SCEV *FromBlock =
+      SE.getExitCount(&L, L.getLoopLatch(), ScalarEvolution::SymbolicMaximum);
+  if (isa<SCEVCouldNotCompute>(FromBlock))
+    return SE.getSymbolicMaxBackedgeTakenCount(&L);
+  return FromBlock;
+}
+
 std::optional<LoopStructure>
 LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
                                   const char *&FailureReason) {
@@ -781,12 +794,12 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
     return std::nullopt;
   }
 
-  const SCEV *LatchCount = SE.getExitCount(&L, Latch);
-  if (isa<SCEVCouldNotCompute>(LatchCount)) {
+  const SCEV *MaxBETakenCount = getNarrowestLatchMaxTakenCountEstimate(SE, L);
+  if (isa<SCEVCouldNotCompute>(MaxBETakenCount)) {
     FailureReason = "could not compute latch count";
     return std::nullopt;
   }
-  assert(SE.getLoopDisposition(LatchCount, &L) ==
+  assert(SE.getLoopDisposition(MaxBETakenCount, &L) ==
              ScalarEvolution::LoopInvariant &&
          "loop variant exit count doesn't make sense!");
 
@@ -1392,12 +1405,12 @@ Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
 
 bool LoopConstrainer::run() {
   BasicBlock *Preheader = nullptr;
-  const SCEV *LatchTakenCount =
-      SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch);
+  const SCEV *MaxBETakenCount =
+      getNarrowestLatchMaxTakenCountEstimate(SE, OriginalLoop);
   Preheader = OriginalLoop.getLoopPreheader();
-  assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader != nullptr &&
+  assert(!isa<SCEVCouldNotCompute>(MaxBETakenCount) && Preheader != nullptr &&
          "preconditions!");
-  ExitCountTy = cast<IntegerType>(LatchTakenCount->getType());
+  ExitCountTy = cast<IntegerType>(MaxBETakenCount->getType());
 
   OriginalPreheader = Preheader;
   MainLoopPreheader = Preheader;

diff  --git a/llvm/test/Transforms/IRCE/stride_more_than_1.ll b/llvm/test/Transforms/IRCE/stride_more_than_1.ll
index f349269e9d683..678cbe6f7cebe 100644
--- a/llvm/test/Transforms/IRCE/stride_more_than_1.ll
+++ b/llvm/test/Transforms/IRCE/stride_more_than_1.ll
@@ -598,7 +598,7 @@ exit:
   ret void
 }
 
-; TODO: IRCE is legal here.
+; IRCE is legal here.
 ; Here how it is done if the step was 1: https://godbolt.org/z/jEqWaseWc
 ; It is also legal for step 4. Proof:
 ; - Capacity check ensures that iv < limit <= SINT_MAX - 3, meaning that
@@ -614,23 +614,66 @@ define i32 @test_09(ptr %p, ptr %capacity_p, ptr %num_elements_p) {
 ; CHECK-NEXT:    [[CAPACITY:%.*]] = load i32, ptr [[CAPACITY_P]], align 4, !range [[RNG16:![0-9]+]]
 ; CHECK-NEXT:    [[NUM_ELEMENTS:%.*]] = load i32, ptr [[NUM_ELEMENTS_P]], align 4, !range [[RNG16]]
 ; CHECK-NEXT:    [[LIMIT:%.*]] = sub i32 [[CAPACITY]], 3
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[CAPACITY]], -3
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[CAPACITY]], 2147483646
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP0]], [[SMAX]]
+; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[LIMIT]], i32 0)
+; CHECK-NEXT:    [[SMAX2:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 -1)
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i32 [[SMAX2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[SMIN3:%.*]] = call i32 @llvm.smin.i32(i32 [[NUM_ELEMENTS]], i32 [[TMP4]])
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN3]], i32 0)
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    [[CAPACITY_CHECK:%.*]] = icmp slt i32 [[IV]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CAPACITY_CHECK]], label [[BACKEDGE]], label [[OUT_OF_BOUNDS:%.*]], !prof [[PROF17:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof [[PROF17:![0-9]+]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_WIDE:%.*]] = zext i32 [[IV]] to i64
 ; CHECK-NEXT:    [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i64 [[IV_WIDE]]
 ; CHECK-NEXT:    store i32 1, ptr [[EL_PTR]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[NUM_ELEMENTS]]
-; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[IV_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
+; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[IV_NEXT_LCSSA]], [[NUM_ELEMENTS]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IV_COPY:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    [[IV_LCSSA1_PH:%.*]] = phi i32 [ [[IV_POSTLOOP:%.*]], [[BACKEDGE_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[IV_LCSSA1:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ]
+; CHECK-NEXT:    [[IV_LCSSA1:%.*]] = phi i32 [ [[IV_LCSSA]], [[MAIN_EXIT_SELECTOR]] ], [ [[IV_LCSSA1_PH]], [[EXIT_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[IV_LCSSA1]]
+; CHECK:       out_of_bounds.loopexit:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out_of_bounds.loopexit5:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
 ; CHECK:       out_of_bounds:
 ; CHECK-NEXT:    ret i32 -1
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP:%.*]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IV_POSTLOOP]] = phi i32 [ [[IV_COPY]], [[POSTLOOP]] ], [ [[IV_NEXT_POSTLOOP:%.*]], [[BACKEDGE_POSTLOOP]] ]
+; CHECK-NEXT:    [[CAPACITY_CHECK_POSTLOOP:%.*]] = icmp slt i32 [[IV_POSTLOOP]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CAPACITY_CHECK_POSTLOOP]], label [[BACKEDGE_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof [[PROF17]]
+; CHECK:       backedge.postloop:
+; CHECK-NEXT:    [[IV_WIDE_POSTLOOP:%.*]] = zext i32 [[IV_POSTLOOP]] to i64
+; CHECK-NEXT:    [[EL_PTR_POSTLOOP:%.*]] = getelementptr i32, ptr [[P]], i64 [[IV_WIDE_POSTLOOP]]
+; CHECK-NEXT:    store i32 1, ptr [[EL_PTR_POSTLOOP]], align 4
+; CHECK-NEXT:    [[IV_NEXT_POSTLOOP]] = add nuw nsw i32 [[IV_POSTLOOP]], 4
+; CHECK-NEXT:    [[LOOP_COND_POSTLOOP:%.*]] = icmp slt i32 [[IV_NEXT_POSTLOOP]], [[NUM_ELEMENTS]]
+; CHECK-NEXT:    br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP18:![0-9]+]], !irce.loop.clone !6
 ;
 entry:
   %capacity = load i32, ptr %capacity_p, !range !4


        


More information about the llvm-commits mailing list