[llvm] Users/xiangzhangllvm/loop unroll folding branches (PR #74703)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 7 00:39:24 PST 2023


https://github.com/xiangzh1 created https://github.com/llvm/llvm-project/pull/74703

**Back ground:**
This is an extension of [74268](https://github.com/llvm/llvm-project/pull/74268), in [74268](https://github.com/llvm/llvm-project/pull/74268) we try to fix loop unroll fail at SimplifyCFG.

SimplifyCFG folding loop branches then cause loop unroll failed for "#program unroll" loop.
for example:
```
#program unroll
for (int I = 0; I < ConstNum; ++I) { // ConstNum > 1
  if (Cond2) {
  break;
  }
  xxx loop body;
}
```

Duo to the fix in SimplifyCFG will miss branch folding optimization. As @nikic suggested, we try fix it at loop unroll stage.
The pragma unroll metadata only takes effect if there is an exact trip count, but not if there is an upper bound trip count. This patch make it work with an upper bound trip count as well in shouldPragmaUnroll().

**Why we do this**:
Loop unroll is important in stack nervous devices (e.g. GPU, and that is why a lot of GPU code mark loop with "#program unroll").
It usually much simplify the address (offset) calculations in old iterations, then we can do a lot of others optimizations, e.g, SROA, for these simplifed address (escape alloca the whole aggregates).

>From 4936c4e65fef505f7566a79d6544f89a6c4677ec Mon Sep 17 00:00:00 2001
From: Zhang Xiang <xiang.zhang at iluvatar.com>
Date: Thu, 7 Dec 2023 15:38:25 +0800
Subject: [PATCH 1/2] [LoopUnroll] Pre-commit test for loop unroll after
 folding branches in simplify cfg

---
 .../SimplifyCFG/simplify-cfg-unroll.ll        | 198 ++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
new file mode 100644
index 0000000000000..c7b63ac5e9688
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
@@ -0,0 +1,198 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -S -passes=simplifycfg | FileCheck %s --check-prefixes=CHECK-CFG
+; RUN: opt < %s -S -passes=simplifycfg,loop-unroll --unroll-max-upperbound=17 | FileCheck %s --check-prefixes=CHECK-UNROLL
+
+define void @func(i32 noundef %Idx, ptr noundef %Arr, i32 noundef %Dims, ptr noundef %Out) {
+; CHECK-CFG-LABEL: define void @func(
+; CHECK-CFG-SAME: i32 noundef [[IDX:%.*]], ptr noundef [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr noundef [[OUT:%.*]]) {
+; CHECK-CFG-NEXT:  entry:
+; CHECK-CFG-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK-CFG:       for.cond:
+; CHECK-CFG-NEXT:    [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
+; CHECK-CFG-NEXT:    [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
+; CHECK-CFG-NEXT:    [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
+; CHECK-CFG-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
+; CHECK-CFG-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; CHECK-CFG-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-CFG:       if.end:
+; CHECK-CFG-NEXT:    [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
+; CHECK-CFG-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
+; CHECK-CFG-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-CFG-NEXT:    [[IDXPROM2:%.*]] = sext i32 [[IDX_ADDR_0]] to i64
+; CHECK-CFG-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM2]]
+; CHECK-CFG-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-CFG-NEXT:    [[ADD]] = add nsw i32 [[TMP1]], 1
+; CHECK-CFG-NEXT:    br label [[FOR_COND4:%.*]]
+; CHECK-CFG:       for.cond4:
+; CHECK-CFG-NEXT:    [[ARG_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY7:%.*]] ]
+; CHECK-CFG-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[ARG_0]], 4
+; CHECK-CFG-NEXT:    br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6]]
+; CHECK-CFG:       for.cond.cleanup6:
+; CHECK-CFG-NEXT:    [[INC16]] = add nsw i32 [[DIM_0]], 1
+; CHECK-CFG-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-CFG:       for.body7:
+; CHECK-CFG-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-CFG-NEXT:    [[IDXPROM10:%.*]] = sext i32 [[ARG_0]] to i64
+; CHECK-CFG-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM10]]
+; CHECK-CFG-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
+; CHECK-CFG-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDXPROM10]]
+; CHECK-CFG-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4
+; CHECK-CFG-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-CFG-NEXT:    store i32 [[ADD14]], ptr [[ARRAYIDX13]], align 4
+; CHECK-CFG-NEXT:    call void @_Z3barv()
+; CHECK-CFG-NEXT:    [[INC]] = add nsw i32 [[ARG_0]], 1
+; CHECK-CFG-NEXT:    br label [[FOR_COND4]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-CFG:       cleanup:
+; CHECK-CFG-NEXT:    ret void
+;
+; CHECK-UNROLL-LABEL: define void @func(
+; CHECK-UNROLL-SAME: i32 noundef [[IDX:%.*]], ptr noundef [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr noundef [[OUT:%.*]]) {
+; CHECK-UNROLL-NEXT:  entry:
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK-UNROLL:       for.cond:
+; CHECK-UNROLL-NEXT:    [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
+; CHECK-UNROLL-NEXT:    [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
+; CHECK-UNROLL-NEXT:    [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
+; CHECK-UNROLL-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
+; CHECK-UNROLL-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; CHECK-UNROLL-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-UNROLL:       if.end:
+; CHECK-UNROLL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
+; CHECK-UNROLL-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[IDXPROM2:%.*]] = sext i32 [[IDX_ADDR_0]] to i64
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM2]]
+; CHECK-UNROLL-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD]] = add nsw i32 [[TMP1]], 1
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4:%.*]]
+; CHECK-UNROLL:       for.cond4:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6:
+; CHECK-UNROLL-NEXT:    [[INC16]] = add nsw i32 [[DIM_0]], 1
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-UNROLL:       for.body7:
+; CHECK-UNROLL-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1:%.*]]
+; CHECK-UNROLL:       for.body7.1:
+; CHECK-UNROLL-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2:%.*]]
+; CHECK-UNROLL:       for.body7.2:
+; CHECK-UNROLL-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3:%.*]]
+; CHECK-UNROLL:       for.body7.3:
+; CHECK-UNROLL-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4:%.*]], label [[FOR_COND_CLEANUP6]]
+; CHECK-UNROLL:       for.body7.4:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_LCSSA:%.*]] = phi ptr [ [[ARRAYIDX]], [[FOR_BODY7_3]] ]
+; CHECK-UNROLL-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[ARRAYIDX_LCSSA]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 4
+; CHECK-UNROLL-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX11_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 4
+; CHECK-UNROLL-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX13_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_4:%.*]] = add nsw i32 [[TMP16]], [[TMP15]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_4]], ptr [[ARRAYIDX13_4]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    unreachable
+; CHECK-UNROLL:       cleanup:
+; CHECK-UNROLL-NEXT:    ret void
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.cleanup6, %entry
+  %Dim.0 = phi i32 [ 0, %entry ], [ %inc16, %for.cond.cleanup6 ]
+  %Idx.addr.0 = phi i32 [ %Idx, %entry ], [ %add, %for.cond.cleanup6 ]
+  %cmp = icmp slt i32 %Dim.0, 16
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  br label %cleanup
+
+for.body:                                         ; preds = %for.cond
+  %cmp1 = icmp eq i32 %Dim.0, %Dims
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  br label %cleanup
+
+if.end:                                           ; preds = %for.body
+  %idxprom = sext i32 %Dim.0 to i64
+  %arrayidx = getelementptr inbounds ptr, ptr %Arr, i64 %idxprom
+  %0 = load ptr, ptr %arrayidx, align 8
+  %idxprom2 = sext i32 %Idx.addr.0 to i64
+  %arrayidx3 = getelementptr inbounds i32, ptr %0, i64 %idxprom2
+  %1 = load i32, ptr %arrayidx3, align 4
+  %add = add nsw i32 %1, 1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.body7, %if.end
+  %arg.0 = phi i32 [ 0, %if.end ], [ %inc, %for.body7 ]
+  %cmp5 = icmp slt i32 %arg.0, 4
+  br i1 %cmp5, label %for.body7, label %for.cond.cleanup6
+
+for.cond.cleanup6:                                ; preds = %for.cond4
+  %inc16 = add nsw i32 %Dim.0, 1
+  br label %for.cond, !llvm.loop !0
+
+for.body7:                                        ; preds = %for.cond4
+  %2 = load ptr, ptr %arrayidx, align 8
+  %idxprom10 = sext i32 %arg.0 to i64
+  %arrayidx11 = getelementptr inbounds i32, ptr %2, i64 %idxprom10
+  %3 = load i32, ptr %arrayidx11, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %Out, i64 %idxprom10
+  %4 = load i32, ptr %arrayidx13, align 4
+  %add14 = add nsw i32 %4, %3
+  store i32 %add14, ptr %arrayidx13, align 4
+  call void @_Z3barv()
+  %inc = add nsw i32 %arg.0, 1
+  br label %for.cond4, !llvm.loop !3
+
+cleanup:                                          ; preds = %if.then, %for.cond.cleanup
+  ret void
+}
+
+  declare void @_Z3barv()
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.mustprogress"}
+!2 = !{!"llvm.loop.unroll.enable"}
+!3 = distinct !{!3, !1}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.enable"}
+;.
+; CHECK-CFG: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-CFG: [[META1]] = !{!"llvm.loop.mustprogress"}
+; CHECK-CFG: [[META2]] = !{!"llvm.loop.unroll.enable"}
+; CHECK-CFG: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+;.
+; CHECK-UNROLL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-UNROLL: [[META1]] = !{!"llvm.loop.mustprogress"}
+; CHECK-UNROLL: [[META2]] = !{!"llvm.loop.unroll.enable"}
+;.

>From 675e3cbd1f3645556465d9ecdfa40bef3fad1c62 Mon Sep 17 00:00:00 2001
From: Zhang Xiang <xiang.zhang at iluvatar.com>
Date: Thu, 7 Dec 2023 15:46:04 +0800
Subject: [PATCH 2/2] [LoopUnroll] unroll small loops with 'pragma unroll' when
 its trip count is destroyed by preious optimization

---
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp |   9 +-
 .../SimplifyCFG/simplify-cfg-unroll.ll        | 831 +++++++++++++++++-
 2 files changed, 793 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 446aa497026d3..963f97d796ae7 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -755,7 +755,7 @@ static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
 static std::optional<unsigned>
 shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
                    const unsigned TripMultiple, const unsigned TripCount,
-                   const UnrollCostEstimator UCE,
+                   unsigned MaxTripCount, const UnrollCostEstimator UCE,
                    const TargetTransformInfo::UnrollingPreferences &UP) {
 
   // Using unroll pragma
@@ -776,6 +776,11 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
   if (PInfo.PragmaFullUnroll && TripCount != 0)
     return TripCount;
 
+  // Small MaxTripCount is clearly calculated with "pragma unroll".
+  if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
+      MaxTripCount <= UnrollMaxUpperBound)
+    return MaxTripCount;
+
   // if didn't return until here, should continue to other priorties
   return std::nullopt;
 }
@@ -902,7 +907,7 @@ bool llvm::computeUnrollCount(
   // 1st priority is unroll count set by "unroll-count" option.
   // 2nd priority is unroll count set by pragma.
   if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
-                                             UCE, UP)) {
+                                             MaxTripCount, UCE, UP)) {
     UP.Count = *UnrollFactor;
 
     if (UserUnrollCount || (PragmaCount > 0)) {
diff --git a/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
index c7b63ac5e9688..9f7a95637c064 100644
--- a/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
+++ b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
@@ -2,6 +2,28 @@
 ; RUN: opt < %s -S -passes=simplifycfg | FileCheck %s --check-prefixes=CHECK-CFG
 ; RUN: opt < %s -S -passes=simplifycfg,loop-unroll --unroll-max-upperbound=17 | FileCheck %s --check-prefixes=CHECK-UNROLL
 
+; This test designed to check:
+; We can still unroll loop with 'pragma unroll' if loop count(trip count) was destroyed by previous optimization.
+; For exmaple, in following test, loop condition "Dim < 16" was 'merged' with "Dim == Dims" in folding branches
+; at simplifycfg. But if custumer mark the loop with "#pragma unroll", we can still successfully unroll it under
+; unroll-max-upperbound.
+;
+; __device__ void func(int Idx, int *Arr[], int Dims, int *Out) {
+;   #pragma unroll
+;   for (int Dim = 0; Dim < 16; ++Dim) {
+;     if (Dim == Dims) {
+;       break;
+;     }
+;     int divmod = Arr[Dim][Idx];
+;     Idx = divmod + 1;
+;
+;     for (int arg = 0; arg < 4; arg++) {
+;       Out[arg] += Arr[Dim][arg];
+;       bar();
+;     }
+;   }
+; }
+
 define void @func(i32 noundef %Idx, ptr noundef %Arr, i32 noundef %Dims, ptr noundef %Out) {
 ; CHECK-CFG-LABEL: define void @func(
 ; CHECK-CFG-SAME: i32 noundef [[IDX:%.*]], ptr noundef [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr noundef [[OUT:%.*]]) {
@@ -50,72 +72,795 @@ define void @func(i32 noundef %Idx, ptr noundef %Arr, i32 noundef %Dims, ptr nou
 ; CHECK-UNROLL-NEXT:  entry:
 ; CHECK-UNROLL-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK-UNROLL:       for.cond:
-; CHECK-UNROLL-NEXT:    [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
-; CHECK-UNROLL-NEXT:    [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
-; CHECK-UNROLL-NEXT:    [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
-; CHECK-UNROLL-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
-; CHECK-UNROLL-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
-; CHECK-UNROLL-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-UNROLL-NEXT:    [[CMP1:%.*]] = icmp eq i32 0, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
 ; CHECK-UNROLL:       if.end:
-; CHECK-UNROLL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
-; CHECK-UNROLL-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[IDXPROM2:%.*]] = sext i32 [[IDX_ADDR_0]] to i64
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM2]]
-; CHECK-UNROLL-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD]] = add nsw i32 [[TMP1]], 1
 ; CHECK-UNROLL-NEXT:    br label [[FOR_COND4:%.*]]
 ; CHECK-UNROLL:       for.cond4:
 ; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7:%.*]]
 ; CHECK-UNROLL:       for.cond.cleanup6:
-; CHECK-UNROLL-NEXT:    [[INC16]] = add nsw i32 [[DIM_0]], 1
-; CHECK-UNROLL-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-UNROLL-NEXT:    [[CMP1_1:%.*]] = icmp eq i32 1, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_1]], label [[CLEANUP]], label [[IF_END_1:%.*]]
+; CHECK-UNROLL:       if.end.1:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 1
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_1:%.*]]
+; CHECK-UNROLL:       for.cond4.1:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_12:%.*]]
+; CHECK-UNROLL:       for.body7.12:
+; CHECK-UNROLL-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_11:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_11]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_1:%.*]]
+; CHECK-UNROLL:       for.body7.1.1:
+; CHECK-UNROLL-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX11_1_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13_1_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_1]], ptr [[ARRAYIDX13_1_1]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_1:%.*]]
+; CHECK-UNROLL:       for.body7.2.1:
+; CHECK-UNROLL-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_1:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX11_2_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX13_2_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_1]], ptr [[ARRAYIDX13_2_1]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_1:%.*]]
+; CHECK-UNROLL:       for.body7.3.1:
+; CHECK-UNROLL-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_1:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX11_3_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX13_3_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_1:%.*]] = add nsw i32 [[TMP11]], [[TMP10]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_1]], ptr [[ARRAYIDX13_3_1]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4:%.*]], label [[FOR_COND_CLEANUP6_1:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.1:
+; CHECK-UNROLL-NEXT:    [[CMP1_2:%.*]] = icmp eq i32 2, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_2]], label [[CLEANUP]], label [[IF_END_2:%.*]]
+; CHECK-UNROLL:       if.end.2:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 2
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_2:%.*]]
+; CHECK-UNROLL:       for.cond4.2:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_24:%.*]]
+; CHECK-UNROLL:       for.body7.24:
+; CHECK-UNROLL-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP14:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_23:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_23]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_2:%.*]]
+; CHECK-UNROLL:       for.body7.1.2:
+; CHECK-UNROLL-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_2:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11_1_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX13_1_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_2:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_2]], ptr [[ARRAYIDX13_1_2]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_2:%.*]]
+; CHECK-UNROLL:       for.body7.2.2:
+; CHECK-UNROLL-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_2:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX11_2_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX13_2_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_2:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_2]], ptr [[ARRAYIDX13_2_2]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_2:%.*]]
+; CHECK-UNROLL:       for.body7.3.2:
+; CHECK-UNROLL-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_2:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11_3_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX13_3_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_2:%.*]] = add nsw i32 [[TMP23]], [[TMP22]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_2]], ptr [[ARRAYIDX13_3_2]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_2:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.2:
+; CHECK-UNROLL-NEXT:    [[CMP1_3:%.*]] = icmp eq i32 3, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_3]], label [[CLEANUP]], label [[IF_END_3:%.*]]
+; CHECK-UNROLL:       if.end.3:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 3
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_3:%.*]]
+; CHECK-UNROLL:       for.cond4.3:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_36:%.*]]
+; CHECK-UNROLL:       for.body7.36:
+; CHECK-UNROLL-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP26:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_35:%.*]] = add nsw i32 [[TMP26]], [[TMP25]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_35]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_3:%.*]]
+; CHECK-UNROLL:       for.body7.1.3:
+; CHECK-UNROLL-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_3:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX11_1_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX13_1_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_3:%.*]] = add nsw i32 [[TMP29]], [[TMP28]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_3]], ptr [[ARRAYIDX13_1_3]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_3:%.*]]
+; CHECK-UNROLL:       for.body7.2.3:
+; CHECK-UNROLL-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_3:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX11_2_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX13_2_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_3:%.*]] = add nsw i32 [[TMP32]], [[TMP31]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_3]], ptr [[ARRAYIDX13_2_3]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_3:%.*]]
+; CHECK-UNROLL:       for.body7.3.3:
+; CHECK-UNROLL-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_3:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX11_3_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX13_3_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_3:%.*]] = add nsw i32 [[TMP35]], [[TMP34]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_3]], ptr [[ARRAYIDX13_3_3]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_3:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.3:
+; CHECK-UNROLL-NEXT:    [[CMP1_4:%.*]] = icmp eq i32 4, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_4]], label [[CLEANUP]], label [[IF_END_4:%.*]]
+; CHECK-UNROLL:       if.end.4:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 4
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_4:%.*]]
+; CHECK-UNROLL:       for.cond4.4:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_48:%.*]]
+; CHECK-UNROLL:       for.body7.48:
+; CHECK-UNROLL-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP38:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_47:%.*]] = add nsw i32 [[TMP38]], [[TMP37]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_47]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_4:%.*]]
+; CHECK-UNROLL:       for.body7.1.4:
+; CHECK-UNROLL-NEXT:    [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_4:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP40:%.*]] = load i32, ptr [[ARRAYIDX11_1_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP41:%.*]] = load i32, ptr [[ARRAYIDX13_1_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_4:%.*]] = add nsw i32 [[TMP41]], [[TMP40]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_4]], ptr [[ARRAYIDX13_1_4]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_4:%.*]]
+; CHECK-UNROLL:       for.body7.2.4:
+; CHECK-UNROLL-NEXT:    [[TMP42:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_4:%.*]] = getelementptr inbounds i32, ptr [[TMP42]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP43:%.*]] = load i32, ptr [[ARRAYIDX11_2_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP44:%.*]] = load i32, ptr [[ARRAYIDX13_2_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_4:%.*]] = add nsw i32 [[TMP44]], [[TMP43]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_4]], ptr [[ARRAYIDX13_2_4]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_4:%.*]]
+; CHECK-UNROLL:       for.body7.3.4:
+; CHECK-UNROLL-NEXT:    [[TMP45:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_4:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX11_3_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP47:%.*]] = load i32, ptr [[ARRAYIDX13_3_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_4:%.*]] = add nsw i32 [[TMP47]], [[TMP46]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_4]], ptr [[ARRAYIDX13_3_4]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_4:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.4:
+; CHECK-UNROLL-NEXT:    [[CMP1_5:%.*]] = icmp eq i32 5, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_5]], label [[CLEANUP]], label [[IF_END_5:%.*]]
+; CHECK-UNROLL:       if.end.5:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 5
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_5:%.*]]
+; CHECK-UNROLL:       for.cond4.5:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_5:%.*]]
+; CHECK-UNROLL:       for.body7.5:
+; CHECK-UNROLL-NEXT:    [[TMP48:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP50:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_5:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_5]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_5:%.*]]
+; CHECK-UNROLL:       for.body7.1.5:
+; CHECK-UNROLL-NEXT:    [[TMP51:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_5:%.*]] = getelementptr inbounds i32, ptr [[TMP51]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX11_1_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX13_1_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_5:%.*]] = add nsw i32 [[TMP53]], [[TMP52]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_5]], ptr [[ARRAYIDX13_1_5]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_5:%.*]]
+; CHECK-UNROLL:       for.body7.2.5:
+; CHECK-UNROLL-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_5:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP55:%.*]] = load i32, ptr [[ARRAYIDX11_2_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP56:%.*]] = load i32, ptr [[ARRAYIDX13_2_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_5:%.*]] = add nsw i32 [[TMP56]], [[TMP55]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_5]], ptr [[ARRAYIDX13_2_5]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_5:%.*]]
+; CHECK-UNROLL:       for.body7.3.5:
+; CHECK-UNROLL-NEXT:    [[TMP57:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_5:%.*]] = getelementptr inbounds i32, ptr [[TMP57]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP58:%.*]] = load i32, ptr [[ARRAYIDX11_3_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP59:%.*]] = load i32, ptr [[ARRAYIDX13_3_5]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_5:%.*]] = add nsw i32 [[TMP59]], [[TMP58]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_5]], ptr [[ARRAYIDX13_3_5]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_5:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.5:
+; CHECK-UNROLL-NEXT:    [[CMP1_6:%.*]] = icmp eq i32 6, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_6]], label [[CLEANUP]], label [[IF_END_6:%.*]]
+; CHECK-UNROLL:       if.end.6:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 6
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_6:%.*]]
+; CHECK-UNROLL:       for.cond4.6:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_6:%.*]]
+; CHECK-UNROLL:       for.body7.6:
+; CHECK-UNROLL-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP62:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_6:%.*]] = add nsw i32 [[TMP62]], [[TMP61]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_6]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_6:%.*]]
+; CHECK-UNROLL:       for.body7.1.6:
+; CHECK-UNROLL-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_6:%.*]] = getelementptr inbounds i32, ptr [[TMP63]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP64:%.*]] = load i32, ptr [[ARRAYIDX11_1_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP65:%.*]] = load i32, ptr [[ARRAYIDX13_1_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_6:%.*]] = add nsw i32 [[TMP65]], [[TMP64]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_6]], ptr [[ARRAYIDX13_1_6]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_6:%.*]]
+; CHECK-UNROLL:       for.body7.2.6:
+; CHECK-UNROLL-NEXT:    [[TMP66:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_6:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP67:%.*]] = load i32, ptr [[ARRAYIDX11_2_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP68:%.*]] = load i32, ptr [[ARRAYIDX13_2_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_6:%.*]] = add nsw i32 [[TMP68]], [[TMP67]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_6]], ptr [[ARRAYIDX13_2_6]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_6:%.*]]
+; CHECK-UNROLL:       for.body7.3.6:
+; CHECK-UNROLL-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_6:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP70:%.*]] = load i32, ptr [[ARRAYIDX11_3_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP71:%.*]] = load i32, ptr [[ARRAYIDX13_3_6]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_6:%.*]] = add nsw i32 [[TMP71]], [[TMP70]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_6]], ptr [[ARRAYIDX13_3_6]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_6:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.6:
+; CHECK-UNROLL-NEXT:    [[CMP1_7:%.*]] = icmp eq i32 7, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_7]], label [[CLEANUP]], label [[IF_END_7:%.*]]
+; CHECK-UNROLL:       if.end.7:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 7
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_7:%.*]]
+; CHECK-UNROLL:       for.cond4.7:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_7:%.*]]
+; CHECK-UNROLL:       for.body7.7:
+; CHECK-UNROLL-NEXT:    [[TMP72:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP74:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_7:%.*]] = add nsw i32 [[TMP74]], [[TMP73]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_7]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_7:%.*]]
+; CHECK-UNROLL:       for.body7.1.7:
+; CHECK-UNROLL-NEXT:    [[TMP75:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_7:%.*]] = getelementptr inbounds i32, ptr [[TMP75]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP76:%.*]] = load i32, ptr [[ARRAYIDX11_1_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP77:%.*]] = load i32, ptr [[ARRAYIDX13_1_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_7:%.*]] = add nsw i32 [[TMP77]], [[TMP76]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_7]], ptr [[ARRAYIDX13_1_7]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_7:%.*]]
+; CHECK-UNROLL:       for.body7.2.7:
+; CHECK-UNROLL-NEXT:    [[TMP78:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_7:%.*]] = getelementptr inbounds i32, ptr [[TMP78]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP79:%.*]] = load i32, ptr [[ARRAYIDX11_2_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP80:%.*]] = load i32, ptr [[ARRAYIDX13_2_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_7:%.*]] = add nsw i32 [[TMP80]], [[TMP79]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_7]], ptr [[ARRAYIDX13_2_7]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_7:%.*]]
+; CHECK-UNROLL:       for.body7.3.7:
+; CHECK-UNROLL-NEXT:    [[TMP81:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_7:%.*]] = getelementptr inbounds i32, ptr [[TMP81]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP82:%.*]] = load i32, ptr [[ARRAYIDX11_3_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP83:%.*]] = load i32, ptr [[ARRAYIDX13_3_7]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_7:%.*]] = add nsw i32 [[TMP83]], [[TMP82]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_7]], ptr [[ARRAYIDX13_3_7]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_7:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.7:
+; CHECK-UNROLL-NEXT:    [[CMP1_8:%.*]] = icmp eq i32 8, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_8]], label [[CLEANUP]], label [[IF_END_8:%.*]]
+; CHECK-UNROLL:       if.end.8:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 8
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_8:%.*]]
+; CHECK-UNROLL:       for.cond4.8:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_8:%.*]]
+; CHECK-UNROLL:       for.body7.8:
+; CHECK-UNROLL-NEXT:    [[TMP84:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP86:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_8:%.*]] = add nsw i32 [[TMP86]], [[TMP85]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_8]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_8:%.*]]
+; CHECK-UNROLL:       for.body7.1.8:
+; CHECK-UNROLL-NEXT:    [[TMP87:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_8:%.*]] = getelementptr inbounds i32, ptr [[TMP87]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP88:%.*]] = load i32, ptr [[ARRAYIDX11_1_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP89:%.*]] = load i32, ptr [[ARRAYIDX13_1_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_8:%.*]] = add nsw i32 [[TMP89]], [[TMP88]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_8]], ptr [[ARRAYIDX13_1_8]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_8:%.*]]
+; CHECK-UNROLL:       for.body7.2.8:
+; CHECK-UNROLL-NEXT:    [[TMP90:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_8:%.*]] = getelementptr inbounds i32, ptr [[TMP90]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP91:%.*]] = load i32, ptr [[ARRAYIDX11_2_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP92:%.*]] = load i32, ptr [[ARRAYIDX13_2_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_8:%.*]] = add nsw i32 [[TMP92]], [[TMP91]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_8]], ptr [[ARRAYIDX13_2_8]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_8:%.*]]
+; CHECK-UNROLL:       for.body7.3.8:
+; CHECK-UNROLL-NEXT:    [[TMP93:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_8:%.*]] = getelementptr inbounds i32, ptr [[TMP93]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP94:%.*]] = load i32, ptr [[ARRAYIDX11_3_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP95:%.*]] = load i32, ptr [[ARRAYIDX13_3_8]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_8:%.*]] = add nsw i32 [[TMP95]], [[TMP94]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_8]], ptr [[ARRAYIDX13_3_8]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_8:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.8:
+; CHECK-UNROLL-NEXT:    [[CMP1_9:%.*]] = icmp eq i32 9, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_9]], label [[CLEANUP]], label [[IF_END_9:%.*]]
+; CHECK-UNROLL:       if.end.9:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 9
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_9:%.*]]
+; CHECK-UNROLL:       for.cond4.9:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_9:%.*]]
+; CHECK-UNROLL:       for.body7.9:
+; CHECK-UNROLL-NEXT:    [[TMP96:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP97:%.*]] = load i32, ptr [[TMP96]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP98:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_9:%.*]] = add nsw i32 [[TMP98]], [[TMP97]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_9]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_9:%.*]]
+; CHECK-UNROLL:       for.body7.1.9:
+; CHECK-UNROLL-NEXT:    [[TMP99:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_9:%.*]] = getelementptr inbounds i32, ptr [[TMP99]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP100:%.*]] = load i32, ptr [[ARRAYIDX11_1_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP101:%.*]] = load i32, ptr [[ARRAYIDX13_1_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_9:%.*]] = add nsw i32 [[TMP101]], [[TMP100]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_9]], ptr [[ARRAYIDX13_1_9]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_9:%.*]]
+; CHECK-UNROLL:       for.body7.2.9:
+; CHECK-UNROLL-NEXT:    [[TMP102:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_9:%.*]] = getelementptr inbounds i32, ptr [[TMP102]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP103:%.*]] = load i32, ptr [[ARRAYIDX11_2_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP104:%.*]] = load i32, ptr [[ARRAYIDX13_2_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_9:%.*]] = add nsw i32 [[TMP104]], [[TMP103]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_9]], ptr [[ARRAYIDX13_2_9]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_9:%.*]]
+; CHECK-UNROLL:       for.body7.3.9:
+; CHECK-UNROLL-NEXT:    [[TMP105:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_9:%.*]] = getelementptr inbounds i32, ptr [[TMP105]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP106:%.*]] = load i32, ptr [[ARRAYIDX11_3_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP107:%.*]] = load i32, ptr [[ARRAYIDX13_3_9]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_9:%.*]] = add nsw i32 [[TMP107]], [[TMP106]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_9]], ptr [[ARRAYIDX13_3_9]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_9:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.9:
+; CHECK-UNROLL-NEXT:    [[CMP1_10:%.*]] = icmp eq i32 10, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_10]], label [[CLEANUP]], label [[IF_END_10:%.*]]
+; CHECK-UNROLL:       if.end.10:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 10
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_10:%.*]]
+; CHECK-UNROLL:       for.cond4.10:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_10:%.*]]
+; CHECK-UNROLL:       for.body7.10:
+; CHECK-UNROLL-NEXT:    [[TMP108:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP109:%.*]] = load i32, ptr [[TMP108]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP110:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_10:%.*]] = add nsw i32 [[TMP110]], [[TMP109]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_10]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_10:%.*]]
+; CHECK-UNROLL:       for.body7.1.10:
+; CHECK-UNROLL-NEXT:    [[TMP111:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_10:%.*]] = getelementptr inbounds i32, ptr [[TMP111]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP112:%.*]] = load i32, ptr [[ARRAYIDX11_1_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP113:%.*]] = load i32, ptr [[ARRAYIDX13_1_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_10:%.*]] = add nsw i32 [[TMP113]], [[TMP112]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_10]], ptr [[ARRAYIDX13_1_10]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_10:%.*]]
+; CHECK-UNROLL:       for.body7.2.10:
+; CHECK-UNROLL-NEXT:    [[TMP114:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_10:%.*]] = getelementptr inbounds i32, ptr [[TMP114]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP115:%.*]] = load i32, ptr [[ARRAYIDX11_2_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP116:%.*]] = load i32, ptr [[ARRAYIDX13_2_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_10:%.*]] = add nsw i32 [[TMP116]], [[TMP115]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_10]], ptr [[ARRAYIDX13_2_10]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_10:%.*]]
+; CHECK-UNROLL:       for.body7.3.10:
+; CHECK-UNROLL-NEXT:    [[TMP117:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_10:%.*]] = getelementptr inbounds i32, ptr [[TMP117]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP118:%.*]] = load i32, ptr [[ARRAYIDX11_3_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP119:%.*]] = load i32, ptr [[ARRAYIDX13_3_10]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_10:%.*]] = add nsw i32 [[TMP119]], [[TMP118]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_10]], ptr [[ARRAYIDX13_3_10]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_10:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.10:
+; CHECK-UNROLL-NEXT:    [[CMP1_11:%.*]] = icmp eq i32 11, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_11]], label [[CLEANUP]], label [[IF_END_11:%.*]]
+; CHECK-UNROLL:       if.end.11:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 11
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_11:%.*]]
+; CHECK-UNROLL:       for.cond4.11:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_11:%.*]]
+; CHECK-UNROLL:       for.body7.11:
+; CHECK-UNROLL-NEXT:    [[TMP120:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP121:%.*]] = load i32, ptr [[TMP120]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP122:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_119:%.*]] = add nsw i32 [[TMP122]], [[TMP121]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_119]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_11:%.*]]
+; CHECK-UNROLL:       for.body7.1.11:
+; CHECK-UNROLL-NEXT:    [[TMP123:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_11:%.*]] = getelementptr inbounds i32, ptr [[TMP123]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP124:%.*]] = load i32, ptr [[ARRAYIDX11_1_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP125:%.*]] = load i32, ptr [[ARRAYIDX13_1_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_11:%.*]] = add nsw i32 [[TMP125]], [[TMP124]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_11]], ptr [[ARRAYIDX13_1_11]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_11:%.*]]
+; CHECK-UNROLL:       for.body7.2.11:
+; CHECK-UNROLL-NEXT:    [[TMP126:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_11:%.*]] = getelementptr inbounds i32, ptr [[TMP126]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP127:%.*]] = load i32, ptr [[ARRAYIDX11_2_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP128:%.*]] = load i32, ptr [[ARRAYIDX13_2_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_11:%.*]] = add nsw i32 [[TMP128]], [[TMP127]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_11]], ptr [[ARRAYIDX13_2_11]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_11:%.*]]
+; CHECK-UNROLL:       for.body7.3.11:
+; CHECK-UNROLL-NEXT:    [[TMP129:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_11:%.*]] = getelementptr inbounds i32, ptr [[TMP129]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP130:%.*]] = load i32, ptr [[ARRAYIDX11_3_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP131:%.*]] = load i32, ptr [[ARRAYIDX13_3_11]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_11:%.*]] = add nsw i32 [[TMP131]], [[TMP130]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_11]], ptr [[ARRAYIDX13_3_11]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_11:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.11:
+; CHECK-UNROLL-NEXT:    [[CMP1_12:%.*]] = icmp eq i32 12, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_12]], label [[CLEANUP]], label [[IF_END_12:%.*]]
+; CHECK-UNROLL:       if.end.12:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 12
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_12:%.*]]
+; CHECK-UNROLL:       for.cond4.12:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1210:%.*]]
+; CHECK-UNROLL:       for.body7.1210:
+; CHECK-UNROLL-NEXT:    [[TMP132:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP133:%.*]] = load i32, ptr [[TMP132]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP134:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_12:%.*]] = add nsw i32 [[TMP134]], [[TMP133]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_12]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_12:%.*]]
+; CHECK-UNROLL:       for.body7.1.12:
+; CHECK-UNROLL-NEXT:    [[TMP135:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_12:%.*]] = getelementptr inbounds i32, ptr [[TMP135]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP136:%.*]] = load i32, ptr [[ARRAYIDX11_1_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP137:%.*]] = load i32, ptr [[ARRAYIDX13_1_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_12:%.*]] = add nsw i32 [[TMP137]], [[TMP136]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_12]], ptr [[ARRAYIDX13_1_12]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_12:%.*]]
+; CHECK-UNROLL:       for.body7.2.12:
+; CHECK-UNROLL-NEXT:    [[TMP138:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_12:%.*]] = getelementptr inbounds i32, ptr [[TMP138]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP139:%.*]] = load i32, ptr [[ARRAYIDX11_2_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP140:%.*]] = load i32, ptr [[ARRAYIDX13_2_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_12:%.*]] = add nsw i32 [[TMP140]], [[TMP139]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_12]], ptr [[ARRAYIDX13_2_12]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_12:%.*]]
+; CHECK-UNROLL:       for.body7.3.12:
+; CHECK-UNROLL-NEXT:    [[TMP141:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_12:%.*]] = getelementptr inbounds i32, ptr [[TMP141]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP142:%.*]] = load i32, ptr [[ARRAYIDX11_3_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP143:%.*]] = load i32, ptr [[ARRAYIDX13_3_12]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_12:%.*]] = add nsw i32 [[TMP143]], [[TMP142]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_12]], ptr [[ARRAYIDX13_3_12]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_12:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.12:
+; CHECK-UNROLL-NEXT:    [[CMP1_13:%.*]] = icmp eq i32 13, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_13]], label [[CLEANUP]], label [[IF_END_13:%.*]]
+; CHECK-UNROLL:       if.end.13:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 13
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_13:%.*]]
+; CHECK-UNROLL:       for.cond4.13:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_13:%.*]]
+; CHECK-UNROLL:       for.body7.13:
+; CHECK-UNROLL-NEXT:    [[TMP144:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP145:%.*]] = load i32, ptr [[TMP144]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP146:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_13:%.*]] = add nsw i32 [[TMP146]], [[TMP145]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_13]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_13:%.*]]
+; CHECK-UNROLL:       for.body7.1.13:
+; CHECK-UNROLL-NEXT:    [[TMP147:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_13:%.*]] = getelementptr inbounds i32, ptr [[TMP147]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP148:%.*]] = load i32, ptr [[ARRAYIDX11_1_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP149:%.*]] = load i32, ptr [[ARRAYIDX13_1_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_13:%.*]] = add nsw i32 [[TMP149]], [[TMP148]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_13]], ptr [[ARRAYIDX13_1_13]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_13:%.*]]
+; CHECK-UNROLL:       for.body7.2.13:
+; CHECK-UNROLL-NEXT:    [[TMP150:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_13:%.*]] = getelementptr inbounds i32, ptr [[TMP150]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX11_2_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP152:%.*]] = load i32, ptr [[ARRAYIDX13_2_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_13:%.*]] = add nsw i32 [[TMP152]], [[TMP151]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_13]], ptr [[ARRAYIDX13_2_13]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_13:%.*]]
+; CHECK-UNROLL:       for.body7.3.13:
+; CHECK-UNROLL-NEXT:    [[TMP153:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_13:%.*]] = getelementptr inbounds i32, ptr [[TMP153]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP154:%.*]] = load i32, ptr [[ARRAYIDX11_3_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP155:%.*]] = load i32, ptr [[ARRAYIDX13_3_13]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_13:%.*]] = add nsw i32 [[TMP155]], [[TMP154]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_13]], ptr [[ARRAYIDX13_3_13]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_13:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.13:
+; CHECK-UNROLL-NEXT:    [[CMP1_14:%.*]] = icmp eq i32 14, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_14]], label [[CLEANUP]], label [[IF_END_14:%.*]]
+; CHECK-UNROLL:       if.end.14:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 14
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_14:%.*]]
+; CHECK-UNROLL:       for.cond4.14:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_14:%.*]]
+; CHECK-UNROLL:       for.body7.14:
+; CHECK-UNROLL-NEXT:    [[TMP156:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP157:%.*]] = load i32, ptr [[TMP156]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP158:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_14:%.*]] = add nsw i32 [[TMP158]], [[TMP157]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_14]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_14:%.*]]
+; CHECK-UNROLL:       for.body7.1.14:
+; CHECK-UNROLL-NEXT:    [[TMP159:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_14:%.*]] = getelementptr inbounds i32, ptr [[TMP159]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP160:%.*]] = load i32, ptr [[ARRAYIDX11_1_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP161:%.*]] = load i32, ptr [[ARRAYIDX13_1_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_14:%.*]] = add nsw i32 [[TMP161]], [[TMP160]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_14]], ptr [[ARRAYIDX13_1_14]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_14:%.*]]
+; CHECK-UNROLL:       for.body7.2.14:
+; CHECK-UNROLL-NEXT:    [[TMP162:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_14:%.*]] = getelementptr inbounds i32, ptr [[TMP162]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP163:%.*]] = load i32, ptr [[ARRAYIDX11_2_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP164:%.*]] = load i32, ptr [[ARRAYIDX13_2_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_14:%.*]] = add nsw i32 [[TMP164]], [[TMP163]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_14]], ptr [[ARRAYIDX13_2_14]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_14:%.*]]
+; CHECK-UNROLL:       for.body7.3.14:
+; CHECK-UNROLL-NEXT:    [[TMP165:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_14:%.*]] = getelementptr inbounds i32, ptr [[TMP165]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP166:%.*]] = load i32, ptr [[ARRAYIDX11_3_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP167:%.*]] = load i32, ptr [[ARRAYIDX13_3_14]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_14:%.*]] = add nsw i32 [[TMP167]], [[TMP166]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_14]], ptr [[ARRAYIDX13_3_14]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_14:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.14:
+; CHECK-UNROLL-NEXT:    [[CMP1_15:%.*]] = icmp eq i32 15, [[DIMS]]
+; CHECK-UNROLL-NEXT:    br i1 [[CMP1_15]], label [[CLEANUP]], label [[IF_END_15:%.*]]
+; CHECK-UNROLL:       if.end.15:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 15
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_15:%.*]]
+; CHECK-UNROLL:       for.cond4.15:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_15:%.*]]
+; CHECK-UNROLL:       for.body7.15:
+; CHECK-UNROLL-NEXT:    [[TMP168:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP169:%.*]] = load i32, ptr [[TMP168]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP170:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_15:%.*]] = add nsw i32 [[TMP170]], [[TMP169]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_15]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_15:%.*]]
+; CHECK-UNROLL:       for.body7.1.15:
+; CHECK-UNROLL-NEXT:    [[TMP171:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_15:%.*]] = getelementptr inbounds i32, ptr [[TMP171]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP172:%.*]] = load i32, ptr [[ARRAYIDX11_1_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP173:%.*]] = load i32, ptr [[ARRAYIDX13_1_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_15:%.*]] = add nsw i32 [[TMP173]], [[TMP172]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_15]], ptr [[ARRAYIDX13_1_15]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_15:%.*]]
+; CHECK-UNROLL:       for.body7.2.15:
+; CHECK-UNROLL-NEXT:    [[TMP174:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_15:%.*]] = getelementptr inbounds i32, ptr [[TMP174]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP175:%.*]] = load i32, ptr [[ARRAYIDX11_2_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP176:%.*]] = load i32, ptr [[ARRAYIDX13_2_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_15:%.*]] = add nsw i32 [[TMP176]], [[TMP175]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_15]], ptr [[ARRAYIDX13_2_15]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_15:%.*]]
+; CHECK-UNROLL:       for.body7.3.15:
+; CHECK-UNROLL-NEXT:    [[TMP177:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_15:%.*]] = getelementptr inbounds i32, ptr [[TMP177]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP178:%.*]] = load i32, ptr [[ARRAYIDX11_3_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP179:%.*]] = load i32, ptr [[ARRAYIDX13_3_15]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_15:%.*]] = add nsw i32 [[TMP179]], [[TMP178]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_15]], ptr [[ARRAYIDX13_3_15]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_15:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.15:
+; CHECK-UNROLL-NEXT:    br i1 true, label [[CLEANUP]], label [[IF_END_16:%.*]]
+; CHECK-UNROLL:       if.end.16:
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 16
+; CHECK-UNROLL-NEXT:    br label [[FOR_COND4_16:%.*]]
+; CHECK-UNROLL:       for.cond4.16:
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_16:%.*]]
+; CHECK-UNROLL:       for.body7.16:
+; CHECK-UNROLL-NEXT:    [[TMP180:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP181:%.*]] = load i32, ptr [[TMP180]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP182:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_16:%.*]] = add nsw i32 [[TMP182]], [[TMP181]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_16]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1_16:%.*]]
+; CHECK-UNROLL:       for.body7.1.16:
+; CHECK-UNROLL-NEXT:    [[TMP183:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1_16:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP184:%.*]] = load i32, ptr [[ARRAYIDX11_1_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP185:%.*]] = load i32, ptr [[ARRAYIDX13_1_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1_16:%.*]] = add nsw i32 [[TMP185]], [[TMP184]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1_16]], ptr [[ARRAYIDX13_1_16]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2_16:%.*]]
+; CHECK-UNROLL:       for.body7.2.16:
+; CHECK-UNROLL-NEXT:    [[TMP186:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2_16:%.*]] = getelementptr inbounds i32, ptr [[TMP186]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP187:%.*]] = load i32, ptr [[ARRAYIDX11_2_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP188:%.*]] = load i32, ptr [[ARRAYIDX13_2_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2_16:%.*]] = add nsw i32 [[TMP188]], [[TMP187]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2_16]], ptr [[ARRAYIDX13_2_16]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3_16:%.*]]
+; CHECK-UNROLL:       for.body7.3.16:
+; CHECK-UNROLL-NEXT:    [[TMP189:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3_16:%.*]] = getelementptr inbounds i32, ptr [[TMP189]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP190:%.*]] = load i32, ptr [[ARRAYIDX11_3_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP191:%.*]] = load i32, ptr [[ARRAYIDX13_3_16]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3_16:%.*]] = add nsw i32 [[TMP191]], [[TMP190]]
+; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3_16]], ptr [[ARRAYIDX13_3_16]], align 4
+; CHECK-UNROLL-NEXT:    call void @_Z3barv()
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_16:%.*]]
+; CHECK-UNROLL:       for.cond.cleanup6.16:
+; CHECK-UNROLL-NEXT:    unreachable
 ; CHECK-UNROLL:       for.body7:
-; CHECK-UNROLL-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-; CHECK-UNROLL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-UNROLL-NEXT:    [[TMP192:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT:    [[TMP193:%.*]] = load i32, ptr [[TMP192]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP194:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP194]], [[TMP193]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14]], ptr [[OUT]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
 ; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_1:%.*]]
 ; CHECK-UNROLL:       for.body7.1:
-; CHECK-UNROLL-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
-; CHECK-UNROLL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP195:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP195]], i64 1
+; CHECK-UNROLL-NEXT:    [[TMP196:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4
 ; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
-; CHECK-UNROLL-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+; CHECK-UNROLL-NEXT:    [[TMP197:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP197]], [[TMP196]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
 ; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_2:%.*]]
 ; CHECK-UNROLL:       for.body7.2:
-; CHECK-UNROLL-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2
-; CHECK-UNROLL-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP198:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP198]], i64 2
+; CHECK-UNROLL-NEXT:    [[TMP199:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4
 ; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
-; CHECK-UNROLL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+; CHECK-UNROLL-NEXT:    [[TMP200:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP200]], [[TMP199]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
 ; CHECK-UNROLL-NEXT:    br label [[FOR_BODY7_3:%.*]]
 ; CHECK-UNROLL:       for.body7.3:
-; CHECK-UNROLL-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3
-; CHECK-UNROLL-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4
+; CHECK-UNROLL-NEXT:    [[TMP201:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP201]], i64 3
+; CHECK-UNROLL-NEXT:    [[TMP202:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4
 ; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
-; CHECK-UNROLL-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+; CHECK-UNROLL-NEXT:    [[TMP203:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP203]], [[TMP202]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
-; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4:%.*]], label [[FOR_COND_CLEANUP6]]
+; CHECK-UNROLL-NEXT:    br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6:%.*]]
 ; CHECK-UNROLL:       for.body7.4:
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX_LCSSA:%.*]] = phi ptr [ [[ARRAYIDX]], [[FOR_BODY7_3]] ]
-; CHECK-UNROLL-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[ARRAYIDX_LCSSA]], align 8
-; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 4
-; CHECK-UNROLL-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX11_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX_LCSSA:%.*]] = phi ptr [ [[ARR]], [[FOR_BODY7_3]] ], [ [[ARRAYIDX_1]], [[FOR_BODY7_3_1]] ], [ [[ARRAYIDX_2]], [[FOR_BODY7_3_2]] ], [ [[ARRAYIDX_3]], [[FOR_BODY7_3_3]] ], [ [[ARRAYIDX_4]], [[FOR_BODY7_3_4]] ], [ [[ARRAYIDX_5]], [[FOR_BODY7_3_5]] ], [ [[ARRAYIDX_6]], [[FOR_BODY7_3_6]] ], [ [[ARRAYIDX_7]], [[FOR_BODY7_3_7]] ], [ [[ARRAYIDX_8]], [[FOR_BODY7_3_8]] ], [ [[ARRAYIDX_9]], [[FOR_BODY7_3_9]] ], [ [[ARRAYIDX_10]], [[FOR_BODY7_3_10]] ], [ [[ARRAYIDX_11]], [[FOR_BODY7_3_11]] ], [ [[ARRAYIDX_12]], [[FOR_BODY7_3_12]] ], [ [[ARRAYIDX_13]], [[FOR_BODY7_3_13]] ], [ [[ARRAYIDX_14]], [[FOR_BODY7_3_14]] ], [ [[ARRAYIDX_15]], [[FOR_BODY7_3_15]] ], [ [[ARRAYIDX_16]], [[FOR_BODY7_3_16]] ]
+; CHECK-UNROLL-NEXT:    [[TMP204:%.*]] = load ptr, ptr [[ARRAYIDX_LCSSA]], align 8
+; CHECK-UNROLL-NEXT:    [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i32, ptr [[TMP204]], i64 4
+; CHECK-UNROLL-NEXT:    [[TMP205:%.*]] = load i32, ptr [[ARRAYIDX11_4]], align 4
 ; CHECK-UNROLL-NEXT:    [[ARRAYIDX13_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 4
-; CHECK-UNROLL-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX13_4]], align 4
-; CHECK-UNROLL-NEXT:    [[ADD14_4:%.*]] = add nsw i32 [[TMP16]], [[TMP15]]
+; CHECK-UNROLL-NEXT:    [[TMP206:%.*]] = load i32, ptr [[ARRAYIDX13_4]], align 4
+; CHECK-UNROLL-NEXT:    [[ADD14_4:%.*]] = add nsw i32 [[TMP206]], [[TMP205]]
 ; CHECK-UNROLL-NEXT:    store i32 [[ADD14_4]], ptr [[ARRAYIDX13_4]], align 4
 ; CHECK-UNROLL-NEXT:    call void @_Z3barv()
 ; CHECK-UNROLL-NEXT:    unreachable
@@ -192,7 +937,3 @@ cleanup:                                          ; preds = %if.then, %for.cond.
 ; CHECK-CFG: [[META2]] = !{!"llvm.loop.unroll.enable"}
 ; CHECK-CFG: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
 ;.
-; CHECK-UNROLL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK-UNROLL: [[META1]] = !{!"llvm.loop.mustprogress"}
-; CHECK-UNROLL: [[META2]] = !{!"llvm.loop.unroll.enable"}
-;.



More information about the llvm-commits mailing list