[llvm] [LoopConstrainer] Apply loop gurads to check that loop bounds are safe (PR #71531)

Aleksandr Popov via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 7 04:29:52 PST 2023


https://github.com/aleks-tmb created https://github.com/llvm/llvm-project/pull/71531

Loop guards that apply to loop SCEV bounds allow IRCE for cases with compound loop bounds such as:

if (K > 0 && M > 0)
  for (i = 0; i < min(K, M); i++) {...}

if (K > 0 && M > 0)
  for (i = min(K, M); i >= 0; i--) {...}

Otherwise SCEV couldn't prove that loops have safe bounds in these cases.

>From b1ef93b00fb71873275566d783bb08f8c26dd555 Mon Sep 17 00:00:00 2001
From: Aleksander Popov <apopov at azul.com>
Date: Tue, 7 Nov 2023 05:31:13 +0100
Subject: [PATCH] [LoopConstrainer] Apply loop gurads to check that loop bounds
 are safe

Loop guards that apply to loop SCEV bounds allow IRCE for cases with
compound loop bounds such as:

if (K > 0 && M > 0)
  for (i = 0; i < min(K, M); i++) {...}

if (K > 0 && M > 0)
  for (i = min(K, M); i >= 0; i--) {...}

Otherwise SCEV couldn't prove that loops have safe bounds in these
cases.
---
 llvm/lib/Transforms/Utils/LoopConstrainer.cpp |  6 ++
 .../Transforms/IRCE/compound-loop-bound.ll    | 85 +++++++++++++++--
 .../Transforms/IRCE/variable-loop-bounds.ll   | 95 +++++++++++++++++--
 3 files changed, 168 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
index ea6d952cfa7d4f3..a2fb39ec64037f3 100644
--- a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
+++ b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
@@ -27,6 +27,9 @@ static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
   if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
     return false;
 
+  Start = SE.applyLoopGuards(Start, L);
+  BoundSCEV = SE.applyLoopGuards(BoundSCEV, L);
+
   assert(SE.isKnownNegative(Step) && "expecting negative step");
 
   LLVM_DEBUG(dbgs() << "isSafeDecreasingBound with:\n");
@@ -73,6 +76,9 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
   if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
     return false;
 
+  Start = SE.applyLoopGuards(Start, L);
+  BoundSCEV = SE.applyLoopGuards(BoundSCEV, L);
+
   LLVM_DEBUG(dbgs() << "isSafeIncreasingBound with:\n");
   LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n");
   LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n");
diff --git a/llvm/test/Transforms/IRCE/compound-loop-bound.ll b/llvm/test/Transforms/IRCE/compound-loop-bound.ll
index 0930d19e22154fc..e50d8c6127f4011 100644
--- a/llvm/test/Transforms/IRCE/compound-loop-bound.ll
+++ b/llvm/test/Transforms/IRCE/compound-loop-bound.ll
@@ -16,23 +16,56 @@ define void @incrementing_loop(ptr %arr, ptr %len_ptr, i32 %K, i32 %M) {
 ; CHECK-NEXT:    br i1 [[AND]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
 ; CHECK:       preheader:
 ; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[K]], i32 [[M]])
+; CHECK-NEXT:    [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[LEN]], i32 [[M]])
+; CHECK-NEXT:    [[SMIN2:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN1]], i32 [[K]])
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN2]], i32 0)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ]
-; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX_NEXT]] = add nsw i32 [[IDX]], 1
 ; CHECK-NEXT:    [[GUARD:%.*]] = icmp slt i32 [[IDX]], [[LEN]]
-; CHECK-NEXT:    br i1 [[GUARD]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]]
 ; CHECK:       in.bounds:
 ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX]]
 ; CHECK-NEXT:    store i32 0, ptr [[ADDR]], align 4
 ; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[SMIN]]
-; CHECK-NEXT:    br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[SMIN]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       out.of.bounds.loopexit:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out.of.bounds.loopexit3:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
 ; CHECK:       out.of.bounds:
 ; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP:%.*]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[GUARD_POSTLOOP:%.*]] = icmp slt i32 [[IDX_POSTLOOP]], [[LEN]]
+; CHECK-NEXT:    br i1 [[GUARD_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]]
+; CHECK:       in.bounds.postloop:
+; CHECK-NEXT:    [[ADDR_POSTLOOP:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX_POSTLOOP]]
+; CHECK-NEXT:    store i32 0, ptr [[ADDR_POSTLOOP]], align 4
+; CHECK-NEXT:    [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], [[SMIN]]
+; CHECK-NEXT:    br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP1:![0-9]+]], !loop_constrainer.loop.clone !6
 ;
 entry:
   %len = load i32, ptr %len_ptr, !range !0
@@ -78,24 +111,58 @@ define void @decrementing_loop(ptr %arr, ptr %len_ptr, i32 %K, i32 %M) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i1 [[CHECK0]], [[CHECK1]]
 ; CHECK-NEXT:    br i1 [[AND]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
 ; CHECK:       preheader:
-; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[K]], i32 [[M]])
+; CHECK-NEXT:    [[INDVAR_START:%.*]] = call i32 @llvm.smin.i32(i32 [[K]], i32 [[M]])
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDVAR_START]], 1
+; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[LEN]], i32 [[TMP0]])
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 0)
+; CHECK-NEXT:    [[EXIT_PRELOOP_AT:%.*]] = add nsw i32 [[SMAX]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[INDVAR_START]], [[EXIT_PRELOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preloop.preheader:
+; CHECK-NEXT:    br label [[LOOP_PRELOOP:%.*]]
+; CHECK:       mainloop:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[SMIN]], [[PREHEADER]] ], [ [[IDX_DEC:%.*]], [[IN_BOUNDS:%.*]] ]
-; CHECK-NEXT:    [[IDX_DEC]] = sub i32 [[IDX]], 1
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_PRELOOP_COPY:%.*]], [[MAINLOOP:%.*]] ], [ [[IDX_DEC:%.*]], [[IN_BOUNDS:%.*]] ]
+; CHECK-NEXT:    [[IDX_DEC]] = sub nsw i32 [[IDX]], 1
 ; CHECK-NEXT:    [[GUARD:%.*]] = icmp slt i32 [[IDX]], [[LEN]]
-; CHECK-NEXT:    br i1 [[GUARD]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT1:%.*]]
 ; CHECK:       in.bounds:
 ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX]]
 ; CHECK-NEXT:    store i32 0, ptr [[ADDR]], align 4
 ; CHECK-NEXT:    [[NEXT:%.*]] = icmp sgt i32 [[IDX_DEC]], -1
-; CHECK-NEXT:    br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:    br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]]
+; CHECK:       out.of.bounds.loopexit:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out.of.bounds.loopexit1:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
 ; CHECK:       out.of.bounds:
 ; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT:%.*]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
+; CHECK:       loop.preloop:
+; CHECK-NEXT:    [[IDX_PRELOOP:%.*]] = phi i32 [ [[IDX_DEC_PRELOOP:%.*]], [[IN_BOUNDS_PRELOOP:%.*]] ], [ [[INDVAR_START]], [[LOOP_PRELOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX_DEC_PRELOOP]] = sub i32 [[IDX_PRELOOP]], 1
+; CHECK-NEXT:    [[GUARD_PRELOOP:%.*]] = icmp slt i32 [[IDX_PRELOOP]], [[LEN]]
+; CHECK-NEXT:    br i1 [[GUARD_PRELOOP]], label [[IN_BOUNDS_PRELOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]]
+; CHECK:       in.bounds.preloop:
+; CHECK-NEXT:    [[ADDR_PRELOOP:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX_PRELOOP]]
+; CHECK-NEXT:    store i32 0, ptr [[ADDR_PRELOOP]], align 4
+; CHECK-NEXT:    [[NEXT_PRELOOP:%.*]] = icmp sgt i32 [[IDX_DEC_PRELOOP]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[IDX_DEC_PRELOOP]], [[EXIT_PRELOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP7:![0-9]+]], !loop_constrainer.loop.clone !6
+; CHECK:       preloop.exit.selector:
+; CHECK-NEXT:    [[IDX_DEC_PRELOOP_LCSSA:%.*]] = phi i32 [ [[IDX_DEC_PRELOOP]], [[IN_BOUNDS_PRELOOP]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[IDX_DEC_PRELOOP_LCSSA]], -1
+; CHECK-NEXT:    br i1 [[TMP3]], label [[PRELOOP_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT]]
+; CHECK:       preloop.pseudo.exit:
+; CHECK-NEXT:    [[IDX_PRELOOP_COPY]] = phi i32 [ [[INDVAR_START]], [[PREHEADER]] ], [ [[IDX_DEC_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ [[INDVAR_START]], [[PREHEADER]] ], [ [[IDX_DEC_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[MAINLOOP]]
 ;
   entry:
   %len = load i32, ptr %len_ptr, !range !0
diff --git a/llvm/test/Transforms/IRCE/variable-loop-bounds.ll b/llvm/test/Transforms/IRCE/variable-loop-bounds.ll
index 43d450b938afea4..d40b9818fb21099 100644
--- a/llvm/test/Transforms/IRCE/variable-loop-bounds.ll
+++ b/llvm/test/Transforms/IRCE/variable-loop-bounds.ll
@@ -950,31 +950,108 @@ define void @signed_var_imm_dec_eq(ptr nocapture %a, ptr nocapture readonly %b,
 ; CHECK-NEXT:    [[CMP14:%.*]] = icmp slt i32 [[M]], 1024
 ; CHECK-NEXT:    br i1 [[CMP14]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
 ; CHECK:       for.body.preheader:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[M]], 1
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 1024)
+; CHECK-NEXT:    [[EXIT_PRELOOP_AT:%.*]] = add nsw i32 [[SMAX]], -1
+; CHECK-NEXT:    [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 0)
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = add nsw i32 [[SMAX1]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 1024, [[EXIT_PRELOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_BODY_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
+; CHECK:       for.body.preloop.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY_PRELOOP:%.*]]
+; CHECK:       for.cond.cleanup.loopexit.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit:
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
+; CHECK:       mainloop:
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[INDVAR_END:%.*]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_BODY_PREHEADER3:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       for.body.preheader3:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_INC:%.*]] ], [ 1024, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_INC:%.*]] ], [ [[IV_PRELOOP_COPY:%.*]], [[FOR_BODY_PREHEADER3]] ]
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[IV]], 1024
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]]
-; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_INC]], label [[IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 true, label [[FOR_INC]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[MUL]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[MUL]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ [[ADD]], [[IF_ELSE]] ], [ [[MUL]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    store i32 [[STOREMERGE]], ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[DEC]] = add nsw i32 [[IV]], -1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[DEC]], [[M]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[DEC]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i1 [[TMP6]], true
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MAIN_EXIT_SELECTOR:%.*]], label [[FOR_BODY]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[DEC_LCSSA:%.*]] = phi i32 [ [[DEC]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[DEC_LCSSA]], [[M]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IV_COPY:%.*]] = phi i32 [ [[IV_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[DEC_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END2:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[DEC_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       for.body.preloop:
+; CHECK-NEXT:    [[IV_PRELOOP:%.*]] = phi i32 [ [[DEC_PRELOOP:%.*]], [[FOR_INC_PRELOOP:%.*]] ], [ 1024, [[FOR_BODY_PRELOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[CMP1_PRELOOP:%.*]] = icmp slt i32 [[IV_PRELOOP]], 1024
+; CHECK-NEXT:    [[ARRAYIDX_PRELOOP:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV_PRELOOP]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_PRELOOP]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_PRELOOP:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[IV_PRELOOP]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX2_PRELOOP]], align 4
+; CHECK-NEXT:    [[MUL_PRELOOP:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]]
+; CHECK-NEXT:    [[ARRAYIDX3_PRELOOP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV_PRELOOP]]
+; CHECK-NEXT:    br i1 [[CMP1_PRELOOP]], label [[FOR_INC_PRELOOP]], label [[IF_ELSE_PRELOOP:%.*]]
+; CHECK:       if.else.preloop:
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3_PRELOOP]], align 4
+; CHECK-NEXT:    [[ADD_PRELOOP:%.*]] = add nsw i32 [[TMP11]], [[MUL_PRELOOP]]
+; CHECK-NEXT:    br label [[FOR_INC_PRELOOP]]
+; CHECK:       for.inc.preloop:
+; CHECK-NEXT:    [[STOREMERGE_PRELOOP:%.*]] = phi i32 [ [[ADD_PRELOOP]], [[IF_ELSE_PRELOOP]] ], [ [[MUL_PRELOOP]], [[FOR_BODY_PRELOOP]] ]
+; CHECK-NEXT:    store i32 [[STOREMERGE_PRELOOP]], ptr [[ARRAYIDX3_PRELOOP]], align 4
+; CHECK-NEXT:    [[DEC_PRELOOP]] = add nsw i32 [[IV_PRELOOP]], -1
+; CHECK-NEXT:    [[CMP_PRELOOP:%.*]] = icmp eq i32 [[DEC_PRELOOP]], [[M]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[DEC_PRELOOP]], [[EXIT_PRELOOP_AT]]
+; CHECK-NEXT:    [[TMP13:%.*]] = xor i1 [[TMP12]], true
+; CHECK-NEXT:    br i1 [[TMP13]], label [[PRELOOP_EXIT_SELECTOR:%.*]], label [[FOR_BODY_PRELOOP]], !llvm.loop [[LOOP15:![0-9]+]], !loop_constrainer.loop.clone !5
+; CHECK:       preloop.exit.selector:
+; CHECK-NEXT:    [[DEC_PRELOOP_LCSSA:%.*]] = phi i32 [ [[DEC_PRELOOP]], [[FOR_INC_PRELOOP]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[DEC_PRELOOP_LCSSA]], [[M]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[PRELOOP_PSEUDO_EXIT]], label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK:       preloop.pseudo.exit:
+; CHECK-NEXT:    [[IV_PRELOOP_COPY]] = phi i32 [ 1024, [[FOR_BODY_PREHEADER]] ], [ [[DEC_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END]] = phi i32 [ 1024, [[FOR_BODY_PREHEADER]] ], [ [[DEC_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[MAINLOOP]]
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[FOR_BODY_POSTLOOP:%.*]]
+; CHECK:       for.body.postloop:
+; CHECK-NEXT:    [[IV_POSTLOOP:%.*]] = phi i32 [ [[DEC_POSTLOOP:%.*]], [[FOR_INC_POSTLOOP:%.*]] ], [ [[IV_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[CMP1_POSTLOOP:%.*]] = icmp slt i32 [[IV_POSTLOOP]], 1024
+; CHECK-NEXT:    [[ARRAYIDX_POSTLOOP:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV_POSTLOOP]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX_POSTLOOP]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_POSTLOOP:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[IV_POSTLOOP]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2_POSTLOOP]], align 4
+; CHECK-NEXT:    [[MUL_POSTLOOP:%.*]] = mul nsw i32 [[TMP16]], [[TMP15]]
+; CHECK-NEXT:    [[ARRAYIDX3_POSTLOOP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV_POSTLOOP]]
+; CHECK-NEXT:    br i1 [[CMP1_POSTLOOP]], label [[FOR_INC_POSTLOOP]], label [[IF_ELSE_POSTLOOP:%.*]]
+; CHECK:       if.else.postloop:
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX3_POSTLOOP]], align 4
+; CHECK-NEXT:    [[ADD_POSTLOOP:%.*]] = add nsw i32 [[TMP17]], [[MUL_POSTLOOP]]
+; CHECK-NEXT:    br label [[FOR_INC_POSTLOOP]]
+; CHECK:       for.inc.postloop:
+; CHECK-NEXT:    [[STOREMERGE_POSTLOOP:%.*]] = phi i32 [ [[ADD_POSTLOOP]], [[IF_ELSE_POSTLOOP]] ], [ [[MUL_POSTLOOP]], [[FOR_BODY_POSTLOOP]] ]
+; CHECK-NEXT:    store i32 [[STOREMERGE_POSTLOOP]], ptr [[ARRAYIDX3_POSTLOOP]], align 4
+; CHECK-NEXT:    [[DEC_POSTLOOP]] = add nsw i32 [[IV_POSTLOOP]], -1
+; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp eq i32 [[DEC_POSTLOOP]], [[M]]
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY_POSTLOOP]], !llvm.loop [[LOOP16:![0-9]+]], !loop_constrainer.loop.clone !5
 ;
 entry:
   %cmp14 = icmp slt i32 %M, 1024



More information about the llvm-commits mailing list