[llvm] [SimplifyCFG] Not folding branch in loop header with constant iterations (PR #74268)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 5 00:56:04 PST 2023


https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/74268

>From 771edc8e103a2ab6aae70847245d7ee5e9ff5e69 Mon Sep 17 00:00:00 2001
From: Zhang Xiang <xiang.zhang at iluvatar.com>
Date: Tue, 5 Dec 2023 16:44:44 +0800
Subject: [PATCH 1/2] [SimplifyCFG] Pre-commit test for folding branches in
 simplify cfg

---
 .../SimplifyCFG/simplify-cfg-unroll.ll        | 198 ++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
new file mode 100644
index 0000000000000..f442da7782138
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
@@ -0,0 +1,198 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -S -passes=simplifycfg | FileCheck %s --check-prefixes=CHECK-CFG
+; RUN: opt < %s -S -passes=simplifycfg,loop-unroll | FileCheck %s --check-prefixes=CHECK-UNROLL
+
+define void @func(i32 noundef %Idx, ptr noundef %Arr, i32 noundef %Dims, ptr noundef %Out) {
+; CHECK-CFG-LABEL: define void @func(
+; CHECK-CFG-SAME: i32 noundef [[IDX:%.*]], ptr noundef [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr noundef [[OUT:%.*]]) {
+; CHECK-CFG-NEXT:  entry:
+; CHECK-CFG-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK-CFG:       for.cond:
+; CHECK-CFG-NEXT:    [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
+; CHECK-CFG-NEXT:    [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
+; CHECK-CFG-NEXT:    [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
+; CHECK-CFG-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
+; CHECK-CFG-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; CHECK-CFG-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-CFG:       if.end:
+; CHECK-CFG-NEXT:    [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
+; CHECK-CFG-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
+; CHECK-CFG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-CFG-NEXT:    [[IDXPROM2:%.*]] = sext i32 [[IDX_ADDR_0]] to i64
+; CHECK-CFG-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM2]]
+; CHECK-CFG-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-CFG-NEXT:    [[ADD]] = add nsw i32 [[TMP1]], 1
+; CHECK-CFG-NEXT:    br label [[FOR_COND4:%.*]]
+; CHECK-CFG:       for.cond4:
+; CHECK-CFG-NEXT:    [[ARG_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY7:%.*]] ]
+; CHECK-CFG-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[ARG_0]], 4
+; CHECK-CFG-NEXT:    br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6]]
+; CHECK-CFG:       for.cond.cleanup6:
+; CHECK-CFG-NEXT:    [[INC16]] = add nsw i32 [[DIM_0]], 1
+; CHECK-CFG-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-CFG:       for.body7:
+; CHECK-CFG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-CFG-NEXT:    [[IDXPROM10:%.*]] = sext i32 [[ARG_0]] to i64
+; CHECK-CFG-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM10]]
+; CHECK-CFG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
+; CHECK-CFG-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDXPROM10]]
+; CHECK-CFG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4
+; CHECK-CFG-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-CFG-NEXT:    store i32 [[ADD14]], ptr [[ARRAYIDX13]], align 4
+; CHECK-CFG-NEXT:    call void @_Z3barv()
+; CHECK-CFG-NEXT:    [[INC]] = add nsw i32 [[ARG_0]], 1
+; CHECK-CFG-NEXT:    br label [[FOR_COND4]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-CFG:       cleanup:
+; CHECK-CFG-NEXT:    ret void
+;
+; CHECK-UNROLL-LABEL: define void @func(
+; CHECK-UNROLL-SAME: i32 noundef [[IDX:%.*]], ptr noundef [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr noundef [[OUT:%.*]]) {
+; CHECK-UNROLL-NEXT:  entry:
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK-UNROLL:       for.cond:
+; CHECK-UNROLL-NEXT:    [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
+; CHECK-UNROLL-NEXT:    [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
+; CHECK-UNROLL-NEXT:    [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
+; CHECK-UNROLL-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
+; CHECK-UNROLL-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; CHECK-UNROLL-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-UNROLL:       if.end:
+; CHECK-UNROLL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
+; CHECK-UNROLL-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[IDXPROM2:%.*]] = sext i32 [[IDX_ADDR_0]] to i64
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM2]]
+; CHECK-UNROLL-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD]] = add nsw i32 [[TMP1]], 1
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4:%.*]]
+; CHECK-UNROLL:       for.cond4:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6:
+; CHECK-UNROLL-NEXT:    [[INC16]] = add nsw i32 [[DIM_0]], 1
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-UNROLL:       for.body7:
+; CHECK-UNROLL-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1:%.*]]
+; CHECK-UNROLL:       for.body7.1:
+; CHECK-UNROLL-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2:%.*]]
+; CHECK-UNROLL:       for.body7.2:
+; CHECK-UNROLL-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3:%.*]]
+; CHECK-UNROLL:       for.body7.3:
+; CHECK-UNROLL-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4:%.*]], label [[FOR_COND_CLEANUP6]]
+; CHECK-UNROLL:       for.body7.4:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_LCSSA:%.*]] = phi ptr [ [[ARRAYIDX]], [[FOR_BODY7_3]] ]
+; CHECK-UNROLL-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[ARRAYIDX_LCSSA]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 4
+; CHECK-UNROLL-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX11_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 4
+; CHECK-UNROLL-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX13_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_4:%.*]] = add nsw i32 [[TMP16]], [[TMP15]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_4]], ptr [[ARRAYIDX13_4]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    unreachable
+; CHECK-UNROLL:       cleanup:
+; CHECK-UNROLL-NEXT:    ret void
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.cleanup6, %entry
+  %Dim.0 = phi i32 [ 0, %entry ], [ %inc16, %for.cond.cleanup6 ]
+  %Idx.addr.0 = phi i32 [ %Idx, %entry ], [ %add, %for.cond.cleanup6 ]
+  %cmp = icmp slt i32 %Dim.0, 16
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  br label %cleanup
+
+for.body:                                         ; preds = %for.cond
+  %cmp1 = icmp eq i32 %Dim.0, %Dims
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  br label %cleanup
+
+if.end:                                           ; preds = %for.body
+  %idxprom = sext i32 %Dim.0 to i64
+  %arrayidx = getelementptr inbounds ptr, ptr %Arr, i64 %idxprom
+  %0 = load ptr, ptr %arrayidx, align 8
+  %idxprom2 = sext i32 %Idx.addr.0 to i64
+  %arrayidx3 = getelementptr inbounds i32, ptr %0, i64 %idxprom2
+  %1 = load i32, ptr %arrayidx3, align 4
+  %add = add nsw i32 %1, 1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.body7, %if.end
+  %arg.0 = phi i32 [ 0, %if.end ], [ %inc, %for.body7 ]
+  %cmp5 = icmp slt i32 %arg.0, 4
+  br i1 %cmp5, label %for.body7, label %for.cond.cleanup6
+
+for.cond.cleanup6:                                ; preds = %for.cond4
+  %inc16 = add nsw i32 %Dim.0, 1
+  br label %for.cond, !llvm.loop !0
+
+for.body7:                                        ; preds = %for.cond4
+  %2 = load ptr, ptr %arrayidx, align 8
+  %idxprom10 = sext i32 %arg.0 to i64
+  %arrayidx11 = getelementptr inbounds i32, ptr %2, i64 %idxprom10
+  %3 = load i32, ptr %arrayidx11, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %Out, i64 %idxprom10
+  %4 = load i32, ptr %arrayidx13, align 4
+  %add14 = add nsw i32 %4, %3
+  store i32 %add14, ptr %arrayidx13, align 4
+  call void @_Z3barv()
+  %inc = add nsw i32 %arg.0, 1
+  br label %for.cond4, !llvm.loop !3
+
+cleanup:                                          ; preds = %if.then, %for.cond.cleanup
+  ret void
+}
+
+declare void @_Z3barv()
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.mustprogress"}
+!2 = !{!"llvm.loop.unroll.enable"}
+!3 = distinct !{!3, !1}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.enable"}
+;.
+; CHECK-CFG: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-CFG: [[META1]] = !{!"llvm.loop.mustprogress"}
+; CHECK-CFG: [[META2]] = !{!"llvm.loop.unroll.enable"}
+; CHECK-CFG: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+;.
+; CHECK-UNROLL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-UNROLL: [[META1]] = !{!"llvm.loop.mustprogress"}
+; CHECK-UNROLL: [[META2]] = !{!"llvm.loop.unroll.enable"}
+;.

>From 6a80c39a1d52567be51b0075b91042936024a617 Mon Sep 17 00:00:00 2001
From: Zhang Xiang <xiang.zhang at iluvatar.com>
Date: Tue, 5 Dec 2023 16:47:03 +0800
Subject: [PATCH 2/2] [SimplifyCFG] Not folding branch in constant loops which
 expected unroll

Constant iteration loop with unroll hint usually expected do unroll
by consumers, folding branches in such loop header at SimplifyCFG will
break unroll optimization.
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     |  57 ++
 .../SimplifyCFG/simplify-cfg-unroll.ll        | 851 +++++++++++++++++-
 2 files changed, 860 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c09cf9c2325c4..ca5a55d9cd17c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
 #include <cassert>
@@ -3634,6 +3635,59 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
   }
 }
 
+bool hasUnrollHint(Instruction *TI) {
+  MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
+  if (!MD)
+    return false;
+
+  return GetUnrollMetadata(MD, "llvm.loop.unroll.enable") ||
+         GetUnrollMetadata(MD, "llvm.loop.unroll.full") ||
+         GetUnrollMetadata(MD, "llvm.loop.unroll.count");
+}
+
+// Escape folding "I < ConstNum" with "Cond2" when loops with constant
+// iterations and expected unroll.
+// #pragma unroll
+// for (int I = 0; I < ConstNum; ++I) { // ConstNum > 1
+//   if (Cond2) {
+//     break;
+//   }
+//    xxx loop body;
+//  }
+// Folding these conditional branches may break/affect loop unroll.
+static bool isConstantLoopWithUnrollHint(BranchInst *PBI) {
+  ICmpInst *ICmp = dyn_cast<ICmpInst>(PBI->getCondition());
+  if (!ICmp)
+    return false;
+
+  // Make sure ConstNum > 1
+  bool DoFold = true;
+  for (unsigned I = 0; I < ICmp->getNumOperands(); ++I) {
+    ConstantInt *Op = dyn_cast<ConstantInt>(ICmp->getOperand(I));
+    if (!Op)
+      continue;
+    if (Op->getSExtValue() > 1) {
+      DoFold = false;
+      break;
+    }
+  }
+  if (DoFold)
+    return false;
+
+  // Loop information has not been established yet, so here we easily judge
+  // whether it is a loop by backedge.
+  BasicBlock *PBB = PBI->getParent();
+  for (Function::iterator I = PBB->getIterator(), E = PBB->getParent()->end();
+       I != E; ++I) {
+    BasicBlock *BB = &*I;
+    if (is_contained(predecessors(PBB), BB)) {
+      if (hasUnrollHint(BB->getTerminator()))
+        return true;
+    }
+  }
+  return false;
+}
+
 /// Determine if the two branches share a common destination and deduce a glue
 /// that joins the branches' conditions to arrive at the common destination if
 /// that would be profitable.
@@ -3645,6 +3699,9 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
   assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
          "PredBB must be a predecessor of BB.");
 
+  if (isConstantLoopWithUnrollHint(PBI))
+    return std::nullopt;
+
   // We have the potential to fold the conditions together, but if the
   // predecessor branch is predictable, we may not want to merge them.
   uint64_t PTWeight, PFWeight;
diff --git a/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
index f442da7782138..a3e316ebec0cd 100644
--- a/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
+++ b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
@@ -10,10 +10,11 @@ define void @func(i32 noundef %Idx, ptr noundef %Arr, i32 noundef %Dims, ptr nou
 ; CHECK-CFG:       for.cond:
 ; CHECK-CFG-NEXT:    [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
 ; CHECK-CFG-NEXT:    [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
-; CHECK-CFG-NEXT:    [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
+; CHECK-CFG-NEXT:    [[CMP:%.*]] = icmp slt i32 [[DIM_0]], 16
+; CHECK-CFG-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[CLEANUP:%.*]]
+; CHECK-CFG:       for.body:
 ; CHECK-CFG-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
-; CHECK-CFG-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
-; CHECK-CFG-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-CFG-NEXT:    br i1 [[CMP1]], label [[CLEANUP]], label [[IF_END:%.*]]
 ; CHECK-CFG:       if.end:
 ; CHECK-CFG-NEXT:    [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
 ; CHECK-CFG-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
@@ -50,72 +51,830 @@ define void @func(i32 noundef %Idx, ptr noundef %Arr, i32 noundef %Dims, ptr nou
 ; CHECK-UNROLL-NEXT:  entry:
 ; CHECK-UNROLL-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK-UNROLL:       for.cond:
-; CHECK-UNROLL-NEXT:    [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
-; CHECK-UNROLL-NEXT:    [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
-; CHECK-UNROLL-NEXT:    [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
-; CHECK-UNROLL-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
-; CHECK-UNROLL-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
-; CHECK-UNROLL-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-UNROLL:       for.body:
+; CHECK-UNROLL-NEXT:    [[CMP1:%.*]] = icmp eq i32 0, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
 ; CHECK-UNROLL:       if.end:
-; CHECK-UNROLL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
-; CHECK-UNROLL-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[IDXPROM2:%.*]] = sext i32 [[IDX_ADDR_0]] to i64
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM2]]
-; CHECK-UNROLL-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD]] = add nsw i32 [[TMP1]], 1
 ; CHECK-UNROLL-NEXT:    br label [[FOR_COND4:%.*]]
 ; CHECK-UNROLL:       for.cond4:
 ; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7:%.*]]
 ; CHECK-UNROLL:       for.cond.cleanup6:
-; CHECK-UNROLL-NEXT:    [[INC16]] = add nsw i32 [[DIM_0]], 1
-; CHECK-UNROLL-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_1:%.*]]
+; CHECK-UNROLL:       for.body.1:
+; CHECK-UNROLL-NEXT:    [[CMP1_1:%.*]] = icmp eq i32 1, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_1]], label [[CLEANUP]], label [[IF_END_1:%.*]]
+; CHECK-UNROLL:       if.end.1:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 1
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_1:%.*]]
+; CHECK-UNROLL:       for.cond4.1:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_12:%.*]]
+; CHECK-UNROLL:       for.body7.12:
+; CHECK-UNROLL-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_11:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_11]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_1:%.*]]
+; CHECK-UNROLL:       for.body7.1.1:
+; CHECK-UNROLL-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX11_1_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13_1_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_1]], ptr [[ARRAYIDX13_1_1]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_1:%.*]]
+; CHECK-UNROLL:       for.body7.2.1:
+; CHECK-UNROLL-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_1:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX11_2_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX13_2_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_1]], ptr [[ARRAYIDX13_2_1]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_1:%.*]]
+; CHECK-UNROLL:       for.body7.3.1:
+; CHECK-UNROLL-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_1:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX11_3_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX13_3_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_1:%.*]] = add nsw i32 [[TMP11]], [[TMP10]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_1]], ptr [[ARRAYIDX13_3_1]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4:%.*]], label [[FOR_COND_CLEANUP6_1:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.1:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_2:%.*]]
+; CHECK-UNROLL:       for.body.2:
+; CHECK-UNROLL-NEXT:    [[CMP1_2:%.*]] = icmp eq i32 2, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_2]], label [[CLEANUP]], label [[IF_END_2:%.*]]
+; CHECK-UNROLL:       if.end.2:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 2
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_2:%.*]]
+; CHECK-UNROLL:       for.cond4.2:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_24:%.*]]
+; CHECK-UNROLL:       for.body7.24:
+; CHECK-UNROLL-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP14:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_23:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_23]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_2:%.*]]
+; CHECK-UNROLL:       for.body7.1.2:
+; CHECK-UNROLL-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_2:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11_1_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX13_1_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_2:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_2]], ptr [[ARRAYIDX13_1_2]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_2:%.*]]
+; CHECK-UNROLL:       for.body7.2.2:
+; CHECK-UNROLL-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_2:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX11_2_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX13_2_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_2:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_2]], ptr [[ARRAYIDX13_2_2]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_2:%.*]]
+; CHECK-UNROLL:       for.body7.3.2:
+; CHECK-UNROLL-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_2:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11_3_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX13_3_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_2:%.*]] = add nsw i32 [[TMP23]], [[TMP22]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_2]], ptr [[ARRAYIDX13_3_2]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_2:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.2:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_3:%.*]]
+; CHECK-UNROLL:       for.body.3:
+; CHECK-UNROLL-NEXT:    [[CMP1_3:%.*]] = icmp eq i32 3, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_3]], label [[CLEANUP]], label [[IF_END_3:%.*]]
+; CHECK-UNROLL:       if.end.3:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 3
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_3:%.*]]
+; CHECK-UNROLL:       for.cond4.3:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_36:%.*]]
+; CHECK-UNROLL:       for.body7.36:
+; CHECK-UNROLL-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP26:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_35:%.*]] = add nsw i32 [[TMP26]], [[TMP25]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_35]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_3:%.*]]
+; CHECK-UNROLL:       for.body7.1.3:
+; CHECK-UNROLL-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_3:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX11_1_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX13_1_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_3:%.*]] = add nsw i32 [[TMP29]], [[TMP28]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_3]], ptr [[ARRAYIDX13_1_3]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_3:%.*]]
+; CHECK-UNROLL:       for.body7.2.3:
+; CHECK-UNROLL-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_3:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX11_2_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX13_2_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_3:%.*]] = add nsw i32 [[TMP32]], [[TMP31]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_3]], ptr [[ARRAYIDX13_2_3]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_3:%.*]]
+; CHECK-UNROLL:       for.body7.3.3:
+; CHECK-UNROLL-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_3:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX11_3_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX13_3_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_3:%.*]] = add nsw i32 [[TMP35]], [[TMP34]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_3]], ptr [[ARRAYIDX13_3_3]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_3:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.3:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_4:%.*]]
+; CHECK-UNROLL:       for.body.4:
+; CHECK-UNROLL-NEXT:    [[CMP1_4:%.*]] = icmp eq i32 4, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_4]], label [[CLEANUP]], label [[IF_END_4:%.*]]
+; CHECK-UNROLL:       if.end.4:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 4
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_4:%.*]]
+; CHECK-UNROLL:       for.cond4.4:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_48:%.*]]
+; CHECK-UNROLL:       for.body7.48:
+; CHECK-UNROLL-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP38:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_47:%.*]] = add nsw i32 [[TMP38]], [[TMP37]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_47]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_4:%.*]]
+; CHECK-UNROLL:       for.body7.1.4:
+; CHECK-UNROLL-NEXT:    [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_4:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP40:%.*]] = load i32, ptr [[ARRAYIDX11_1_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP41:%.*]] = load i32, ptr [[ARRAYIDX13_1_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_4:%.*]] = add nsw i32 [[TMP41]], [[TMP40]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_4]], ptr [[ARRAYIDX13_1_4]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_4:%.*]]
+; CHECK-UNROLL:       for.body7.2.4:
+; CHECK-UNROLL-NEXT:    [[TMP42:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_4:%.*]] = getelementptr inbounds i32, ptr [[TMP42]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP43:%.*]] = load i32, ptr [[ARRAYIDX11_2_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP44:%.*]] = load i32, ptr [[ARRAYIDX13_2_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_4:%.*]] = add nsw i32 [[TMP44]], [[TMP43]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_4]], ptr [[ARRAYIDX13_2_4]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_4:%.*]]
+; CHECK-UNROLL:       for.body7.3.4:
+; CHECK-UNROLL-NEXT:    [[TMP45:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_4:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX11_3_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP47:%.*]] = load i32, ptr [[ARRAYIDX13_3_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_4:%.*]] = add nsw i32 [[TMP47]], [[TMP46]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_4]], ptr [[ARRAYIDX13_3_4]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_4:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.4:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_5:%.*]]
+; CHECK-UNROLL:       for.body.5:
+; CHECK-UNROLL-NEXT:    [[CMP1_5:%.*]] = icmp eq i32 5, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_5]], label [[CLEANUP]], label [[IF_END_5:%.*]]
+; CHECK-UNROLL:       if.end.5:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 5
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_5:%.*]]
+; CHECK-UNROLL:       for.cond4.5:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_5:%.*]]
+; CHECK-UNROLL:       for.body7.5:
+; CHECK-UNROLL-NEXT:    [[TMP48:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP50:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_5:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_5]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_5:%.*]]
+; CHECK-UNROLL:       for.body7.1.5:
+; CHECK-UNROLL-NEXT:    [[TMP51:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_5:%.*]] = getelementptr inbounds i32, ptr [[TMP51]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX11_1_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX13_1_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_5:%.*]] = add nsw i32 [[TMP53]], [[TMP52]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_5]], ptr [[ARRAYIDX13_1_5]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_5:%.*]]
+; CHECK-UNROLL:       for.body7.2.5:
+; CHECK-UNROLL-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_5:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP55:%.*]] = load i32, ptr [[ARRAYIDX11_2_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP56:%.*]] = load i32, ptr [[ARRAYIDX13_2_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_5:%.*]] = add nsw i32 [[TMP56]], [[TMP55]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_5]], ptr [[ARRAYIDX13_2_5]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_5:%.*]]
+; CHECK-UNROLL:       for.body7.3.5:
+; CHECK-UNROLL-NEXT:    [[TMP57:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_5:%.*]] = getelementptr inbounds i32, ptr [[TMP57]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP58:%.*]] = load i32, ptr [[ARRAYIDX11_3_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP59:%.*]] = load i32, ptr [[ARRAYIDX13_3_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_5:%.*]] = add nsw i32 [[TMP59]], [[TMP58]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_5]], ptr [[ARRAYIDX13_3_5]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_5:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.5:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_6:%.*]]
+; CHECK-UNROLL:       for.body.6:
+; CHECK-UNROLL-NEXT:    [[CMP1_6:%.*]] = icmp eq i32 6, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_6]], label [[CLEANUP]], label [[IF_END_6:%.*]]
+; CHECK-UNROLL:       if.end.6:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 6
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_6:%.*]]
+; CHECK-UNROLL:       for.cond4.6:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_6:%.*]]
+; CHECK-UNROLL:       for.body7.6:
+; CHECK-UNROLL-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP62:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_6:%.*]] = add nsw i32 [[TMP62]], [[TMP61]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_6]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_6:%.*]]
+; CHECK-UNROLL:       for.body7.1.6:
+; CHECK-UNROLL-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_6:%.*]] = getelementptr inbounds i32, ptr [[TMP63]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP64:%.*]] = load i32, ptr [[ARRAYIDX11_1_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP65:%.*]] = load i32, ptr [[ARRAYIDX13_1_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_6:%.*]] = add nsw i32 [[TMP65]], [[TMP64]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_6]], ptr [[ARRAYIDX13_1_6]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_6:%.*]]
+; CHECK-UNROLL:       for.body7.2.6:
+; CHECK-UNROLL-NEXT:    [[TMP66:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_6:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP67:%.*]] = load i32, ptr [[ARRAYIDX11_2_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP68:%.*]] = load i32, ptr [[ARRAYIDX13_2_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_6:%.*]] = add nsw i32 [[TMP68]], [[TMP67]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_6]], ptr [[ARRAYIDX13_2_6]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_6:%.*]]
+; CHECK-UNROLL:       for.body7.3.6:
+; CHECK-UNROLL-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_6:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP70:%.*]] = load i32, ptr [[ARRAYIDX11_3_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP71:%.*]] = load i32, ptr [[ARRAYIDX13_3_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_6:%.*]] = add nsw i32 [[TMP71]], [[TMP70]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_6]], ptr [[ARRAYIDX13_3_6]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_6:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.6:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_7:%.*]]
+; CHECK-UNROLL:       for.body.7:
+; CHECK-UNROLL-NEXT:    [[CMP1_7:%.*]] = icmp eq i32 7, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_7]], label [[CLEANUP]], label [[IF_END_7:%.*]]
+; CHECK-UNROLL:       if.end.7:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 7
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_7:%.*]]
+; CHECK-UNROLL:       for.cond4.7:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_7:%.*]]
+; CHECK-UNROLL:       for.body7.7:
+; CHECK-UNROLL-NEXT:    [[TMP72:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP74:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_7:%.*]] = add nsw i32 [[TMP74]], [[TMP73]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_7]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_7:%.*]]
+; CHECK-UNROLL:       for.body7.1.7:
+; CHECK-UNROLL-NEXT:    [[TMP75:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_7:%.*]] = getelementptr inbounds i32, ptr [[TMP75]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP76:%.*]] = load i32, ptr [[ARRAYIDX11_1_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP77:%.*]] = load i32, ptr [[ARRAYIDX13_1_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_7:%.*]] = add nsw i32 [[TMP77]], [[TMP76]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_7]], ptr [[ARRAYIDX13_1_7]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_7:%.*]]
+; CHECK-UNROLL:       for.body7.2.7:
+; CHECK-UNROLL-NEXT:    [[TMP78:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_7:%.*]] = getelementptr inbounds i32, ptr [[TMP78]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP79:%.*]] = load i32, ptr [[ARRAYIDX11_2_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP80:%.*]] = load i32, ptr [[ARRAYIDX13_2_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_7:%.*]] = add nsw i32 [[TMP80]], [[TMP79]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_7]], ptr [[ARRAYIDX13_2_7]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_7:%.*]]
+; CHECK-UNROLL:       for.body7.3.7:
+; CHECK-UNROLL-NEXT:    [[TMP81:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_7:%.*]] = getelementptr inbounds i32, ptr [[TMP81]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP82:%.*]] = load i32, ptr [[ARRAYIDX11_3_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP83:%.*]] = load i32, ptr [[ARRAYIDX13_3_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_7:%.*]] = add nsw i32 [[TMP83]], [[TMP82]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_7]], ptr [[ARRAYIDX13_3_7]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_7:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.7:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_8:%.*]]
+; CHECK-UNROLL:       for.body.8:
+; CHECK-UNROLL-NEXT:    [[CMP1_8:%.*]] = icmp eq i32 8, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_8]], label [[CLEANUP]], label [[IF_END_8:%.*]]
+; CHECK-UNROLL:       if.end.8:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 8
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_8:%.*]]
+; CHECK-UNROLL:       for.cond4.8:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_8:%.*]]
+; CHECK-UNROLL:       for.body7.8:
+; CHECK-UNROLL-NEXT:    [[TMP84:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP86:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_8:%.*]] = add nsw i32 [[TMP86]], [[TMP85]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_8]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_8:%.*]]
+; CHECK-UNROLL:       for.body7.1.8:
+; CHECK-UNROLL-NEXT:    [[TMP87:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_8:%.*]] = getelementptr inbounds i32, ptr [[TMP87]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP88:%.*]] = load i32, ptr [[ARRAYIDX11_1_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP89:%.*]] = load i32, ptr [[ARRAYIDX13_1_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_8:%.*]] = add nsw i32 [[TMP89]], [[TMP88]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_8]], ptr [[ARRAYIDX13_1_8]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_8:%.*]]
+; CHECK-UNROLL:       for.body7.2.8:
+; CHECK-UNROLL-NEXT:    [[TMP90:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_8:%.*]] = getelementptr inbounds i32, ptr [[TMP90]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP91:%.*]] = load i32, ptr [[ARRAYIDX11_2_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP92:%.*]] = load i32, ptr [[ARRAYIDX13_2_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_8:%.*]] = add nsw i32 [[TMP92]], [[TMP91]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_8]], ptr [[ARRAYIDX13_2_8]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_8:%.*]]
+; CHECK-UNROLL:       for.body7.3.8:
+; CHECK-UNROLL-NEXT:    [[TMP93:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_8:%.*]] = getelementptr inbounds i32, ptr [[TMP93]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP94:%.*]] = load i32, ptr [[ARRAYIDX11_3_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP95:%.*]] = load i32, ptr [[ARRAYIDX13_3_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_8:%.*]] = add nsw i32 [[TMP95]], [[TMP94]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_8]], ptr [[ARRAYIDX13_3_8]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_8:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.8:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_9:%.*]]
+; CHECK-UNROLL:       for.body.9:
+; CHECK-UNROLL-NEXT:    [[CMP1_9:%.*]] = icmp eq i32 9, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_9]], label [[CLEANUP]], label [[IF_END_9:%.*]]
+; CHECK-UNROLL:       if.end.9:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 9
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_9:%.*]]
+; CHECK-UNROLL:       for.cond4.9:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_9:%.*]]
+; CHECK-UNROLL:       for.body7.9:
+; CHECK-UNROLL-NEXT:    [[TMP96:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP97:%.*]] = load i32, ptr [[TMP96]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP98:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_9:%.*]] = add nsw i32 [[TMP98]], [[TMP97]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_9]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_9:%.*]]
+; CHECK-UNROLL:       for.body7.1.9:
+; CHECK-UNROLL-NEXT:    [[TMP99:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_9:%.*]] = getelementptr inbounds i32, ptr [[TMP99]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP100:%.*]] = load i32, ptr [[ARRAYIDX11_1_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP101:%.*]] = load i32, ptr [[ARRAYIDX13_1_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_9:%.*]] = add nsw i32 [[TMP101]], [[TMP100]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_9]], ptr [[ARRAYIDX13_1_9]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_9:%.*]]
+; CHECK-UNROLL:       for.body7.2.9:
+; CHECK-UNROLL-NEXT:    [[TMP102:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_9:%.*]] = getelementptr inbounds i32, ptr [[TMP102]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP103:%.*]] = load i32, ptr [[ARRAYIDX11_2_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP104:%.*]] = load i32, ptr [[ARRAYIDX13_2_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_9:%.*]] = add nsw i32 [[TMP104]], [[TMP103]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_9]], ptr [[ARRAYIDX13_2_9]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_9:%.*]]
+; CHECK-UNROLL:       for.body7.3.9:
+; CHECK-UNROLL-NEXT:    [[TMP105:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_9:%.*]] = getelementptr inbounds i32, ptr [[TMP105]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP106:%.*]] = load i32, ptr [[ARRAYIDX11_3_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP107:%.*]] = load i32, ptr [[ARRAYIDX13_3_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_9:%.*]] = add nsw i32 [[TMP107]], [[TMP106]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_9]], ptr [[ARRAYIDX13_3_9]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_9:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.9:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_10:%.*]]
+; CHECK-UNROLL:       for.body.10:
+; CHECK-UNROLL-NEXT:    [[CMP1_10:%.*]] = icmp eq i32 10, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_10]], label [[CLEANUP]], label [[IF_END_10:%.*]]
+; CHECK-UNROLL:       if.end.10:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 10
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_10:%.*]]
+; CHECK-UNROLL:       for.cond4.10:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_10:%.*]]
+; CHECK-UNROLL:       for.body7.10:
+; CHECK-UNROLL-NEXT:    [[TMP108:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP109:%.*]] = load i32, ptr [[TMP108]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP110:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_10:%.*]] = add nsw i32 [[TMP110]], [[TMP109]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_10]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_10:%.*]]
+; CHECK-UNROLL:       for.body7.1.10:
+; CHECK-UNROLL-NEXT:    [[TMP111:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_10:%.*]] = getelementptr inbounds i32, ptr [[TMP111]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP112:%.*]] = load i32, ptr [[ARRAYIDX11_1_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP113:%.*]] = load i32, ptr [[ARRAYIDX13_1_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_10:%.*]] = add nsw i32 [[TMP113]], [[TMP112]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_10]], ptr [[ARRAYIDX13_1_10]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_10:%.*]]
+; CHECK-UNROLL:       for.body7.2.10:
+; CHECK-UNROLL-NEXT:    [[TMP114:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_10:%.*]] = getelementptr inbounds i32, ptr [[TMP114]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP115:%.*]] = load i32, ptr [[ARRAYIDX11_2_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP116:%.*]] = load i32, ptr [[ARRAYIDX13_2_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_10:%.*]] = add nsw i32 [[TMP116]], [[TMP115]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_10]], ptr [[ARRAYIDX13_2_10]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_10:%.*]]
+; CHECK-UNROLL:       for.body7.3.10:
+; CHECK-UNROLL-NEXT:    [[TMP117:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_10:%.*]] = getelementptr inbounds i32, ptr [[TMP117]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP118:%.*]] = load i32, ptr [[ARRAYIDX11_3_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP119:%.*]] = load i32, ptr [[ARRAYIDX13_3_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_10:%.*]] = add nsw i32 [[TMP119]], [[TMP118]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_10]], ptr [[ARRAYIDX13_3_10]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_10:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.10:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_11:%.*]]
+; CHECK-UNROLL:       for.body.11:
+; CHECK-UNROLL-NEXT:    [[CMP1_11:%.*]] = icmp eq i32 11, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_11]], label [[CLEANUP]], label [[IF_END_11:%.*]]
+; CHECK-UNROLL:       if.end.11:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 11
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_11:%.*]]
+; CHECK-UNROLL:       for.cond4.11:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_11:%.*]]
+; CHECK-UNROLL:       for.body7.11:
+; CHECK-UNROLL-NEXT:    [[TMP120:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP121:%.*]] = load i32, ptr [[TMP120]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP122:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_119:%.*]] = add nsw i32 [[TMP122]], [[TMP121]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_119]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_11:%.*]]
+; CHECK-UNROLL:       for.body7.1.11:
+; CHECK-UNROLL-NEXT:    [[TMP123:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_11:%.*]] = getelementptr inbounds i32, ptr [[TMP123]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP124:%.*]] = load i32, ptr [[ARRAYIDX11_1_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP125:%.*]] = load i32, ptr [[ARRAYIDX13_1_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_11:%.*]] = add nsw i32 [[TMP125]], [[TMP124]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_11]], ptr [[ARRAYIDX13_1_11]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_11:%.*]]
+; CHECK-UNROLL:       for.body7.2.11:
+; CHECK-UNROLL-NEXT:    [[TMP126:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_11:%.*]] = getelementptr inbounds i32, ptr [[TMP126]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP127:%.*]] = load i32, ptr [[ARRAYIDX11_2_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP128:%.*]] = load i32, ptr [[ARRAYIDX13_2_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_11:%.*]] = add nsw i32 [[TMP128]], [[TMP127]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_11]], ptr [[ARRAYIDX13_2_11]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_11:%.*]]
+; CHECK-UNROLL:       for.body7.3.11:
+; CHECK-UNROLL-NEXT:    [[TMP129:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_11:%.*]] = getelementptr inbounds i32, ptr [[TMP129]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP130:%.*]] = load i32, ptr [[ARRAYIDX11_3_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP131:%.*]] = load i32, ptr [[ARRAYIDX13_3_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_11:%.*]] = add nsw i32 [[TMP131]], [[TMP130]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_11]], ptr [[ARRAYIDX13_3_11]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_11:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.11:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_12:%.*]]
+; CHECK-UNROLL:       for.body.12:
+; CHECK-UNROLL-NEXT:    [[CMP1_12:%.*]] = icmp eq i32 12, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_12]], label [[CLEANUP]], label [[IF_END_12:%.*]]
+; CHECK-UNROLL:       if.end.12:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 12
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_12:%.*]]
+; CHECK-UNROLL:       for.cond4.12:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1210:%.*]]
+; CHECK-UNROLL:       for.body7.1210:
+; CHECK-UNROLL-NEXT:    [[TMP132:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP133:%.*]] = load i32, ptr [[TMP132]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP134:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_12:%.*]] = add nsw i32 [[TMP134]], [[TMP133]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_12]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_12:%.*]]
+; CHECK-UNROLL:       for.body7.1.12:
+; CHECK-UNROLL-NEXT:    [[TMP135:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_12:%.*]] = getelementptr inbounds i32, ptr [[TMP135]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP136:%.*]] = load i32, ptr [[ARRAYIDX11_1_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP137:%.*]] = load i32, ptr [[ARRAYIDX13_1_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_12:%.*]] = add nsw i32 [[TMP137]], [[TMP136]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_12]], ptr [[ARRAYIDX13_1_12]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_12:%.*]]
+; CHECK-UNROLL:       for.body7.2.12:
+; CHECK-UNROLL-NEXT:    [[TMP138:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_12:%.*]] = getelementptr inbounds i32, ptr [[TMP138]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP139:%.*]] = load i32, ptr [[ARRAYIDX11_2_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP140:%.*]] = load i32, ptr [[ARRAYIDX13_2_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_12:%.*]] = add nsw i32 [[TMP140]], [[TMP139]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_12]], ptr [[ARRAYIDX13_2_12]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_12:%.*]]
+; CHECK-UNROLL:       for.body7.3.12:
+; CHECK-UNROLL-NEXT:    [[TMP141:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_12:%.*]] = getelementptr inbounds i32, ptr [[TMP141]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP142:%.*]] = load i32, ptr [[ARRAYIDX11_3_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP143:%.*]] = load i32, ptr [[ARRAYIDX13_3_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_12:%.*]] = add nsw i32 [[TMP143]], [[TMP142]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_12]], ptr [[ARRAYIDX13_3_12]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_12:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.12:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_13:%.*]]
+; CHECK-UNROLL:       for.body.13:
+; CHECK-UNROLL-NEXT:    [[CMP1_13:%.*]] = icmp eq i32 13, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_13]], label [[CLEANUP]], label [[IF_END_13:%.*]]
+; CHECK-UNROLL:       if.end.13:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 13
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_13:%.*]]
+; CHECK-UNROLL:       for.cond4.13:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_13:%.*]]
+; CHECK-UNROLL:       for.body7.13:
+; CHECK-UNROLL-NEXT:    [[TMP144:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP145:%.*]] = load i32, ptr [[TMP144]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP146:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_13:%.*]] = add nsw i32 [[TMP146]], [[TMP145]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_13]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_13:%.*]]
+; CHECK-UNROLL:       for.body7.1.13:
+; CHECK-UNROLL-NEXT:    [[TMP147:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_13:%.*]] = getelementptr inbounds i32, ptr [[TMP147]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP148:%.*]] = load i32, ptr [[ARRAYIDX11_1_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP149:%.*]] = load i32, ptr [[ARRAYIDX13_1_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_13:%.*]] = add nsw i32 [[TMP149]], [[TMP148]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_13]], ptr [[ARRAYIDX13_1_13]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_13:%.*]]
+; CHECK-UNROLL:       for.body7.2.13:
+; CHECK-UNROLL-NEXT:    [[TMP150:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_13:%.*]] = getelementptr inbounds i32, ptr [[TMP150]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX11_2_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP152:%.*]] = load i32, ptr [[ARRAYIDX13_2_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_13:%.*]] = add nsw i32 [[TMP152]], [[TMP151]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_13]], ptr [[ARRAYIDX13_2_13]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_13:%.*]]
+; CHECK-UNROLL:       for.body7.3.13:
+; CHECK-UNROLL-NEXT:    [[TMP153:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_13:%.*]] = getelementptr inbounds i32, ptr [[TMP153]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP154:%.*]] = load i32, ptr [[ARRAYIDX11_3_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP155:%.*]] = load i32, ptr [[ARRAYIDX13_3_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_13:%.*]] = add nsw i32 [[TMP155]], [[TMP154]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_13]], ptr [[ARRAYIDX13_3_13]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_13:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.13:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_14:%.*]]
+; CHECK-UNROLL:       for.body.14:
+; CHECK-UNROLL-NEXT:    [[CMP1_14:%.*]] = icmp eq i32 14, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_14]], label [[CLEANUP]], label [[IF_END_14:%.*]]
+; CHECK-UNROLL:       if.end.14:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 14
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_14:%.*]]
+; CHECK-UNROLL:       for.cond4.14:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_14:%.*]]
+; CHECK-UNROLL:       for.body7.14:
+; CHECK-UNROLL-NEXT:    [[TMP156:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP157:%.*]] = load i32, ptr [[TMP156]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP158:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_14:%.*]] = add nsw i32 [[TMP158]], [[TMP157]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_14]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_14:%.*]]
+; CHECK-UNROLL:       for.body7.1.14:
+; CHECK-UNROLL-NEXT:    [[TMP159:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_14:%.*]] = getelementptr inbounds i32, ptr [[TMP159]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP160:%.*]] = load i32, ptr [[ARRAYIDX11_1_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP161:%.*]] = load i32, ptr [[ARRAYIDX13_1_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_14:%.*]] = add nsw i32 [[TMP161]], [[TMP160]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_14]], ptr [[ARRAYIDX13_1_14]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_14:%.*]]
+; CHECK-UNROLL:       for.body7.2.14:
+; CHECK-UNROLL-NEXT:    [[TMP162:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_14:%.*]] = getelementptr inbounds i32, ptr [[TMP162]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP163:%.*]] = load i32, ptr [[ARRAYIDX11_2_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP164:%.*]] = load i32, ptr [[ARRAYIDX13_2_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_14:%.*]] = add nsw i32 [[TMP164]], [[TMP163]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_14]], ptr [[ARRAYIDX13_2_14]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_14:%.*]]
+; CHECK-UNROLL:       for.body7.3.14:
+; CHECK-UNROLL-NEXT:    [[TMP165:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_14:%.*]] = getelementptr inbounds i32, ptr [[TMP165]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP166:%.*]] = load i32, ptr [[ARRAYIDX11_3_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP167:%.*]] = load i32, ptr [[ARRAYIDX13_3_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_14:%.*]] = add nsw i32 [[TMP167]], [[TMP166]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_14]], ptr [[ARRAYIDX13_3_14]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_14:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.14:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY_15:%.*]]
+; CHECK-UNROLL:       for.body.15:
+; CHECK-UNROLL-NEXT:    [[CMP1_15:%.*]] = icmp eq i32 15, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_15]], label [[CLEANUP]], label [[IF_END_15:%.*]]
+; CHECK-UNROLL:       if.end.15:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 15
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_15:%.*]]
+; CHECK-UNROLL:       for.cond4.15:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_15:%.*]]
+; CHECK-UNROLL:       for.body7.15:
+; CHECK-UNROLL-NEXT:    [[TMP168:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP169:%.*]] = load i32, ptr [[TMP168]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP170:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_15:%.*]] = add nsw i32 [[TMP170]], [[TMP169]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_15]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_15:%.*]]
+; CHECK-UNROLL:       for.body7.1.15:
+; CHECK-UNROLL-NEXT:    [[TMP171:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_15:%.*]] = getelementptr inbounds i32, ptr [[TMP171]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP172:%.*]] = load i32, ptr [[ARRAYIDX11_1_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP173:%.*]] = load i32, ptr [[ARRAYIDX13_1_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_15:%.*]] = add nsw i32 [[TMP173]], [[TMP172]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_15]], ptr [[ARRAYIDX13_1_15]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_15:%.*]]
+; CHECK-UNROLL:       for.body7.2.15:
+; CHECK-UNROLL-NEXT:    [[TMP174:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_15:%.*]] = getelementptr inbounds i32, ptr [[TMP174]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP175:%.*]] = load i32, ptr [[ARRAYIDX11_2_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP176:%.*]] = load i32, ptr [[ARRAYIDX13_2_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_15:%.*]] = add nsw i32 [[TMP176]], [[TMP175]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_15]], ptr [[ARRAYIDX13_2_15]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_15:%.*]]
+; CHECK-UNROLL:       for.body7.3.15:
+; CHECK-UNROLL-NEXT:    [[TMP177:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_15:%.*]] = getelementptr inbounds i32, ptr [[TMP177]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP178:%.*]] = load i32, ptr [[ARRAYIDX11_3_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP179:%.*]] = load i32, ptr [[ARRAYIDX13_3_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_15:%.*]] = add nsw i32 [[TMP179]], [[TMP178]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_15]], ptr [[ARRAYIDX13_3_15]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_15:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.15:
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY_16:%.*]], label [[CLEANUP]]
+; CHECK-UNROLL:       for.body.16:
+; CHECK-UNROLL-NEXT:    [[CMP1_16:%.*]] = icmp eq i32 16, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_16]], label [[CLEANUP]], label [[IF_END_16:%.*]]
+; CHECK-UNROLL:       if.end.16:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 16
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_16:%.*]]
+; CHECK-UNROLL:       for.cond4.16:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_16:%.*]]
+; CHECK-UNROLL:       for.body7.16:
+; CHECK-UNROLL-NEXT:    [[TMP180:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP181:%.*]] = load i32, ptr [[TMP180]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP182:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_16:%.*]] = add nsw i32 [[TMP182]], [[TMP181]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_16]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_16:%.*]]
+; CHECK-UNROLL:       for.body7.1.16:
+; CHECK-UNROLL-NEXT:    [[TMP183:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_16:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP184:%.*]] = load i32, ptr [[ARRAYIDX11_1_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP185:%.*]] = load i32, ptr [[ARRAYIDX13_1_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_16:%.*]] = add nsw i32 [[TMP185]], [[TMP184]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_16]], ptr [[ARRAYIDX13_1_16]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_16:%.*]]
+; CHECK-UNROLL:       for.body7.2.16:
+; CHECK-UNROLL-NEXT:    [[TMP186:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_16:%.*]] = getelementptr inbounds i32, ptr [[TMP186]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP187:%.*]] = load i32, ptr [[ARRAYIDX11_2_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP188:%.*]] = load i32, ptr [[ARRAYIDX13_2_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_16:%.*]] = add nsw i32 [[TMP188]], [[TMP187]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_16]], ptr [[ARRAYIDX13_2_16]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_16:%.*]]
+; CHECK-UNROLL:       for.body7.3.16:
+; CHECK-UNROLL-NEXT:    [[TMP189:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_16:%.*]] = getelementptr inbounds i32, ptr [[TMP189]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP190:%.*]] = load i32, ptr [[ARRAYIDX11_3_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP191:%.*]] = load i32, ptr [[ARRAYIDX13_3_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_16:%.*]] = add nsw i32 [[TMP191]], [[TMP190]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_16]], ptr [[ARRAYIDX13_3_16]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_16:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.16:
+; CHECK-UNROLL-NEXT:    unreachable
 ; CHECK-UNROLL:       for.body7:
-; CHECK-UNROLL-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-; CHECK-UNROLL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-UNROLL-NEXT:    [[TMP192:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP193:%.*]] = load i32, ptr [[TMP192]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP194:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP194]], [[TMP193]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14]], ptr [[OUT]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
 ; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1:%.*]]
 ; CHECK-UNROLL:       for.body7.1:
-; CHECK-UNROLL-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
-; CHECK-UNROLL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP195:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP195]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP196:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4
 ; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
-; CHECK-UNROLL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+; CHECK-UNROLL-NEXT:    [[TMP197:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP197]], [[TMP196]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
 ; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2:%.*]]
 ; CHECK-UNROLL:       for.body7.2:
-; CHECK-UNROLL-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2
-; CHECK-UNROLL-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP198:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP198]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP199:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4
 ; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
-; CHECK-UNROLL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+; CHECK-UNROLL-NEXT:    [[TMP200:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP200]], [[TMP199]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
 ; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3:%.*]]
 ; CHECK-UNROLL:       for.body7.3:
-; CHECK-UNROLL-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3
-; CHECK-UNROLL-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP201:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP201]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP202:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4
 ; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
-; CHECK-UNROLL-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+; CHECK-UNROLL-NEXT:    [[TMP203:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP203]], [[TMP202]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
-; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4:%.*]], label [[FOR_COND_CLEANUP6]]
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6:%.*]]
 ; CHECK-UNROLL:       for.body7.4:
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX_LCSSA:%.*]] = phi ptr [ [[ARRAYIDX]], [[FOR_BODY7_3]] ]
-; CHECK-UNROLL-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[ARRAYIDX_LCSSA]], align 8
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 4
-; CHECK-UNROLL-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX11_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_LCSSA:%.*]] = phi ptr [ [[ARR]], [[FOR_BODY7_3]] ], [ [[ARRAYIDX_1]], [[FOR_BODY7_3_1]] ], [ [[ARRAYIDX_2]], [[FOR_BODY7_3_2]] ], [ [[ARRAYIDX_3]], [[FOR_BODY7_3_3]] ], [ [[ARRAYIDX_4]], [[FOR_BODY7_3_4]] ], [ [[ARRAYIDX_5]], [[FOR_BODY7_3_5]] ], [ [[ARRAYIDX_6]], [[FOR_BODY7_3_6]] ], [ [[ARRAYIDX_7]], [[FOR_BODY7_3_7]] ], [ [[ARRAYIDX_8]], [[FOR_BODY7_3_8]] ], [ [[ARRAYIDX_9]], [[FOR_BODY7_3_9]] ], [ [[ARRAYIDX_10]], [[FOR_BODY7_3_10]] ], [ [[ARRAYIDX_11]], [[FOR_BODY7_3_11]] ], [ [[ARRAYIDX_12]], [[FOR_BODY7_3_12]] ], [ [[ARRAYIDX_13]], [[FOR_BODY7_3_13]] ], [ [[ARRAYIDX_14]], [[FOR_BODY7_3_14]] ], [ [[ARRAYIDX_15]], [[FOR_BODY7_3_15]] ], [ [[ARRAYIDX_16]], [[FOR_BODY7_3_16]] ]
+; CHECK-UNROLL-NEXT:    [[TMP204:%.*]] = load ptr, ptr [[ARRAYIDX_LCSSA]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i32, ptr [[TMP204]], i64 4
+; CHECK-UNROLL-NEXT:    [[TMP205:%.*]] = load i32, ptr [[ARRAYIDX11_4]], align 4
 ; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 4
-; CHECK-UNROLL-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX13_4]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14_4:%.*]] = add nsw i32 [[TMP16]], [[TMP15]]
+; CHECK-UNROLL-NEXT:    [[TMP206:%.*]] = load i32, ptr [[ARRAYIDX13_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_4:%.*]] = add nsw i32 [[TMP206]], [[TMP205]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14_4]], ptr [[ARRAYIDX13_4]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
 ; CHECK-UNROLL-NEXT:    unreachable
@@ -192,7 +951,3 @@ declare void @_Z3barv()
 ; CHECK-CFG: [[META2]] = !{!"llvm.loop.unroll.enable"}
 ; CHECK-CFG: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
 ;.
-; CHECK-UNROLL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK-UNROLL: [[META1]] = !{!"llvm.loop.mustprogress"}
-; CHECK-UNROLL: [[META2]] = !{!"llvm.loop.unroll.enable"}
-;.



More information about the llvm-commits mailing list