[llvm] Users/xiangzhangllvm/loop unroll folding branches (PR #74703)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 7 15:38:56 PST 2023
================
@@ -0,0 +1,939 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -S -passes=simplifycfg | FileCheck %s --check-prefixes=CHECK-CFG
+; RUN: opt < %s -S -passes=simplifycfg,loop-unroll --unroll-max-upperbound=17 | FileCheck %s --check-prefixes=CHECK-UNROLL
+
+; This test designed to check:
+; We can still unroll loop with 'pragma unroll' if loop count(trip count) was destroyed by previous optimization.
+; For exmaple, in following test, loop condition "Dim < 16" was 'merged' with "Dim == Dims" in folding branches
+; at simplifycfg. But if custumer mark the loop with "#pragma unroll", we can still successfully unroll it under
+; unroll-max-upperbound.
+;
+; __device__ void func(int Idx, int *Arr[], int Dims, int *Out) {
+; #pragma unroll
+; for (int Dim = 0; Dim < 16; ++Dim) {
+; if (Dim == Dims) {
+; break;
+; }
+; int divmod = Arr[Dim][Idx];
+; Idx = divmod + 1;
+;
+; for (int arg = 0; arg < 4; arg++) {
+; Out[arg] += Arr[Dim][arg];
+; bar();
+; }
+; }
+; }
+
+define void @func(i32 noundef %Idx, ptr noundef %Arr, i32 noundef %Dims, ptr noundef %Out) {
+; CHECK-CFG-LABEL: define void @func(
+; CHECK-CFG-SAME: i32 noundef [[IDX:%.*]], ptr noundef [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr noundef [[OUT:%.*]]) {
+; CHECK-CFG-NEXT: entry:
+; CHECK-CFG-NEXT: br label [[FOR_COND:%.*]]
+; CHECK-CFG: for.cond:
+; CHECK-CFG-NEXT: [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
+; CHECK-CFG-NEXT: [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
+; CHECK-CFG-NEXT: [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
+; CHECK-CFG-NEXT: [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
+; CHECK-CFG-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; CHECK-CFG-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-CFG: if.end:
+; CHECK-CFG-NEXT: [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
+; CHECK-CFG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
+; CHECK-CFG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-CFG-NEXT: [[IDXPROM2:%.*]] = sext i32 [[IDX_ADDR_0]] to i64
+; CHECK-CFG-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM2]]
+; CHECK-CFG-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-CFG-NEXT: [[ADD]] = add nsw i32 [[TMP1]], 1
+; CHECK-CFG-NEXT: br label [[FOR_COND4:%.*]]
+; CHECK-CFG: for.cond4:
+; CHECK-CFG-NEXT: [[ARG_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY7:%.*]] ]
+; CHECK-CFG-NEXT: [[CMP5:%.*]] = icmp slt i32 [[ARG_0]], 4
+; CHECK-CFG-NEXT: br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6]]
+; CHECK-CFG: for.cond.cleanup6:
+; CHECK-CFG-NEXT: [[INC16]] = add nsw i32 [[DIM_0]], 1
+; CHECK-CFG-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-CFG: for.body7:
+; CHECK-CFG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-CFG-NEXT: [[IDXPROM10:%.*]] = sext i32 [[ARG_0]] to i64
+; CHECK-CFG-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM10]]
+; CHECK-CFG-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
+; CHECK-CFG-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDXPROM10]]
+; CHECK-CFG-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4
+; CHECK-CFG-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-CFG-NEXT: store i32 [[ADD14]], ptr [[ARRAYIDX13]], align 4
+; CHECK-CFG-NEXT: call void @_Z3barv()
+; CHECK-CFG-NEXT: [[INC]] = add nsw i32 [[ARG_0]], 1
+; CHECK-CFG-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-CFG: cleanup:
+; CHECK-CFG-NEXT: ret void
+;
+; CHECK-UNROLL-LABEL: define void @func(
+; CHECK-UNROLL-SAME: i32 noundef [[IDX:%.*]], ptr noundef [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr noundef [[OUT:%.*]]) {
+; CHECK-UNROLL-NEXT: entry:
+; CHECK-UNROLL-NEXT: br label [[FOR_COND:%.*]]
+; CHECK-UNROLL: for.cond:
+; CHECK-UNROLL-NEXT: [[CMP1:%.*]] = icmp eq i32 0, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-UNROLL: if.end:
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4:%.*]]
+; CHECK-UNROLL: for.cond4:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6:
+; CHECK-UNROLL-NEXT: [[CMP1_1:%.*]] = icmp eq i32 1, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_1]], label [[CLEANUP]], label [[IF_END_1:%.*]]
+; CHECK-UNROLL: if.end.1:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 1
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_1:%.*]]
+; CHECK-UNROLL: for.cond4.1:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_12:%.*]]
+; CHECK-UNROLL: for.body7.12:
+; CHECK-UNROLL-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-UNROLL-NEXT: [[TMP2:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_11:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_11]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_1:%.*]]
+; CHECK-UNROLL: for.body7.1.1:
+; CHECK-UNROLL-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX11_1_1]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13_1_1]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_1]], ptr [[ARRAYIDX13_1_1]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_1:%.*]]
+; CHECK-UNROLL: for.body7.2.1:
+; CHECK-UNROLL-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_1:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX11_2_1]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX13_2_1]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_1]], ptr [[ARRAYIDX13_2_1]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_1:%.*]]
+; CHECK-UNROLL: for.body7.3.1:
+; CHECK-UNROLL-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_1:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX11_3_1]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX13_3_1]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_1:%.*]] = add nsw i32 [[TMP11]], [[TMP10]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_1]], ptr [[ARRAYIDX13_3_1]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4:%.*]], label [[FOR_COND_CLEANUP6_1:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.1:
+; CHECK-UNROLL-NEXT: [[CMP1_2:%.*]] = icmp eq i32 2, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_2]], label [[CLEANUP]], label [[IF_END_2:%.*]]
+; CHECK-UNROLL: if.end.2:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 2
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_2:%.*]]
+; CHECK-UNROLL: for.cond4.2:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_24:%.*]]
+; CHECK-UNROLL: for.body7.24:
+; CHECK-UNROLL-NEXT: [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-UNROLL-NEXT: [[TMP14:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_23:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_23]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_2:%.*]]
+; CHECK-UNROLL: for.body7.1.2:
+; CHECK-UNROLL-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_2:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11_1_2]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX13_1_2]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_2:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_2]], ptr [[ARRAYIDX13_1_2]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_2:%.*]]
+; CHECK-UNROLL: for.body7.2.2:
+; CHECK-UNROLL-NEXT: [[TMP18:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_2:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX11_2_2]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX13_2_2]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_2:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_2]], ptr [[ARRAYIDX13_2_2]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_2:%.*]]
+; CHECK-UNROLL: for.body7.3.2:
+; CHECK-UNROLL-NEXT: [[TMP21:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_2:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11_3_2]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX13_3_2]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_2:%.*]] = add nsw i32 [[TMP23]], [[TMP22]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_2]], ptr [[ARRAYIDX13_3_2]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_2:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.2:
+; CHECK-UNROLL-NEXT: [[CMP1_3:%.*]] = icmp eq i32 3, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_3]], label [[CLEANUP]], label [[IF_END_3:%.*]]
+; CHECK-UNROLL: if.end.3:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 3
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_3:%.*]]
+; CHECK-UNROLL: for.cond4.3:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_36:%.*]]
+; CHECK-UNROLL: for.body7.36:
+; CHECK-UNROLL-NEXT: [[TMP24:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+; CHECK-UNROLL-NEXT: [[TMP26:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_35:%.*]] = add nsw i32 [[TMP26]], [[TMP25]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_35]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_3:%.*]]
+; CHECK-UNROLL: for.body7.1.3:
+; CHECK-UNROLL-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_3:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX11_1_3]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX13_1_3]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_3:%.*]] = add nsw i32 [[TMP29]], [[TMP28]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_3]], ptr [[ARRAYIDX13_1_3]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_3:%.*]]
+; CHECK-UNROLL: for.body7.2.3:
+; CHECK-UNROLL-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_3:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX11_2_3]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX13_2_3]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_3:%.*]] = add nsw i32 [[TMP32]], [[TMP31]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_3]], ptr [[ARRAYIDX13_2_3]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_3:%.*]]
+; CHECK-UNROLL: for.body7.3.3:
+; CHECK-UNROLL-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_3:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX11_3_3]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX13_3_3]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_3:%.*]] = add nsw i32 [[TMP35]], [[TMP34]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_3]], ptr [[ARRAYIDX13_3_3]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_3:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.3:
+; CHECK-UNROLL-NEXT: [[CMP1_4:%.*]] = icmp eq i32 4, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_4]], label [[CLEANUP]], label [[IF_END_4:%.*]]
+; CHECK-UNROLL: if.end.4:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 4
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_4:%.*]]
+; CHECK-UNROLL: for.cond4.4:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_48:%.*]]
+; CHECK-UNROLL: for.body7.48:
+; CHECK-UNROLL-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
+; CHECK-UNROLL-NEXT: [[TMP38:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_47:%.*]] = add nsw i32 [[TMP38]], [[TMP37]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_47]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_4:%.*]]
+; CHECK-UNROLL: for.body7.1.4:
+; CHECK-UNROLL-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_4:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP40:%.*]] = load i32, ptr [[ARRAYIDX11_1_4]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP41:%.*]] = load i32, ptr [[ARRAYIDX13_1_4]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_4:%.*]] = add nsw i32 [[TMP41]], [[TMP40]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_4]], ptr [[ARRAYIDX13_1_4]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_4:%.*]]
+; CHECK-UNROLL: for.body7.2.4:
+; CHECK-UNROLL-NEXT: [[TMP42:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_4:%.*]] = getelementptr inbounds i32, ptr [[TMP42]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP43:%.*]] = load i32, ptr [[ARRAYIDX11_2_4]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP44:%.*]] = load i32, ptr [[ARRAYIDX13_2_4]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_4:%.*]] = add nsw i32 [[TMP44]], [[TMP43]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_4]], ptr [[ARRAYIDX13_2_4]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_4:%.*]]
+; CHECK-UNROLL: for.body7.3.4:
+; CHECK-UNROLL-NEXT: [[TMP45:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_4:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX11_3_4]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARRAYIDX13_3_4]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_4:%.*]] = add nsw i32 [[TMP47]], [[TMP46]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_4]], ptr [[ARRAYIDX13_3_4]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_4:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.4:
+; CHECK-UNROLL-NEXT: [[CMP1_5:%.*]] = icmp eq i32 5, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_5]], label [[CLEANUP]], label [[IF_END_5:%.*]]
+; CHECK-UNROLL: if.end.5:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 5
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_5:%.*]]
+; CHECK-UNROLL: for.cond4.5:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_5:%.*]]
+; CHECK-UNROLL: for.body7.5:
+; CHECK-UNROLL-NEXT: [[TMP48:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+; CHECK-UNROLL-NEXT: [[TMP50:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_5:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_5]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_5:%.*]]
+; CHECK-UNROLL: for.body7.1.5:
+; CHECK-UNROLL-NEXT: [[TMP51:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_5:%.*]] = getelementptr inbounds i32, ptr [[TMP51]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX11_1_5]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX13_1_5]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_5:%.*]] = add nsw i32 [[TMP53]], [[TMP52]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_5]], ptr [[ARRAYIDX13_1_5]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_5:%.*]]
+; CHECK-UNROLL: for.body7.2.5:
+; CHECK-UNROLL-NEXT: [[TMP54:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_5:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP55:%.*]] = load i32, ptr [[ARRAYIDX11_2_5]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP56:%.*]] = load i32, ptr [[ARRAYIDX13_2_5]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_5:%.*]] = add nsw i32 [[TMP56]], [[TMP55]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_5]], ptr [[ARRAYIDX13_2_5]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_5:%.*]]
+; CHECK-UNROLL: for.body7.3.5:
+; CHECK-UNROLL-NEXT: [[TMP57:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_5:%.*]] = getelementptr inbounds i32, ptr [[TMP57]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARRAYIDX11_3_5]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP59:%.*]] = load i32, ptr [[ARRAYIDX13_3_5]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_5:%.*]] = add nsw i32 [[TMP59]], [[TMP58]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_5]], ptr [[ARRAYIDX13_3_5]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_5:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.5:
+; CHECK-UNROLL-NEXT: [[CMP1_6:%.*]] = icmp eq i32 6, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_6]], label [[CLEANUP]], label [[IF_END_6:%.*]]
+; CHECK-UNROLL: if.end.6:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 6
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_6:%.*]]
+; CHECK-UNROLL: for.cond4.6:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_6:%.*]]
+; CHECK-UNROLL: for.body7.6:
+; CHECK-UNROLL-NEXT: [[TMP60:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4
+; CHECK-UNROLL-NEXT: [[TMP62:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_6:%.*]] = add nsw i32 [[TMP62]], [[TMP61]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_6]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_6:%.*]]
+; CHECK-UNROLL: for.body7.1.6:
+; CHECK-UNROLL-NEXT: [[TMP63:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_6:%.*]] = getelementptr inbounds i32, ptr [[TMP63]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP64:%.*]] = load i32, ptr [[ARRAYIDX11_1_6]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP65:%.*]] = load i32, ptr [[ARRAYIDX13_1_6]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_6:%.*]] = add nsw i32 [[TMP65]], [[TMP64]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_6]], ptr [[ARRAYIDX13_1_6]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_6:%.*]]
+; CHECK-UNROLL: for.body7.2.6:
+; CHECK-UNROLL-NEXT: [[TMP66:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_6:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP67:%.*]] = load i32, ptr [[ARRAYIDX11_2_6]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP68:%.*]] = load i32, ptr [[ARRAYIDX13_2_6]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_6:%.*]] = add nsw i32 [[TMP68]], [[TMP67]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_6]], ptr [[ARRAYIDX13_2_6]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_6:%.*]]
+; CHECK-UNROLL: for.body7.3.6:
+; CHECK-UNROLL-NEXT: [[TMP69:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_6:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP70:%.*]] = load i32, ptr [[ARRAYIDX11_3_6]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP71:%.*]] = load i32, ptr [[ARRAYIDX13_3_6]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_6:%.*]] = add nsw i32 [[TMP71]], [[TMP70]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_6]], ptr [[ARRAYIDX13_3_6]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_6:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.6:
+; CHECK-UNROLL-NEXT: [[CMP1_7:%.*]] = icmp eq i32 7, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_7]], label [[CLEANUP]], label [[IF_END_7:%.*]]
+; CHECK-UNROLL: if.end.7:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 7
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_7:%.*]]
+; CHECK-UNROLL: for.cond4.7:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_7:%.*]]
+; CHECK-UNROLL: for.body7.7:
+; CHECK-UNROLL-NEXT: [[TMP72:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4
+; CHECK-UNROLL-NEXT: [[TMP74:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_7:%.*]] = add nsw i32 [[TMP74]], [[TMP73]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_7]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_7:%.*]]
+; CHECK-UNROLL: for.body7.1.7:
+; CHECK-UNROLL-NEXT: [[TMP75:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_7:%.*]] = getelementptr inbounds i32, ptr [[TMP75]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP76:%.*]] = load i32, ptr [[ARRAYIDX11_1_7]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP77:%.*]] = load i32, ptr [[ARRAYIDX13_1_7]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_7:%.*]] = add nsw i32 [[TMP77]], [[TMP76]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_7]], ptr [[ARRAYIDX13_1_7]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_7:%.*]]
+; CHECK-UNROLL: for.body7.2.7:
+; CHECK-UNROLL-NEXT: [[TMP78:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_7:%.*]] = getelementptr inbounds i32, ptr [[TMP78]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP79:%.*]] = load i32, ptr [[ARRAYIDX11_2_7]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP80:%.*]] = load i32, ptr [[ARRAYIDX13_2_7]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_7:%.*]] = add nsw i32 [[TMP80]], [[TMP79]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_7]], ptr [[ARRAYIDX13_2_7]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_7:%.*]]
+; CHECK-UNROLL: for.body7.3.7:
+; CHECK-UNROLL-NEXT: [[TMP81:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_7:%.*]] = getelementptr inbounds i32, ptr [[TMP81]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP82:%.*]] = load i32, ptr [[ARRAYIDX11_3_7]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP83:%.*]] = load i32, ptr [[ARRAYIDX13_3_7]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_7:%.*]] = add nsw i32 [[TMP83]], [[TMP82]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_7]], ptr [[ARRAYIDX13_3_7]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_7:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.7:
+; CHECK-UNROLL-NEXT: [[CMP1_8:%.*]] = icmp eq i32 8, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_8]], label [[CLEANUP]], label [[IF_END_8:%.*]]
+; CHECK-UNROLL: if.end.8:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 8
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_8:%.*]]
+; CHECK-UNROLL: for.cond4.8:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_8:%.*]]
+; CHECK-UNROLL: for.body7.8:
+; CHECK-UNROLL-NEXT: [[TMP84:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4
+; CHECK-UNROLL-NEXT: [[TMP86:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_8:%.*]] = add nsw i32 [[TMP86]], [[TMP85]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_8]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_8:%.*]]
+; CHECK-UNROLL: for.body7.1.8:
+; CHECK-UNROLL-NEXT: [[TMP87:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_8:%.*]] = getelementptr inbounds i32, ptr [[TMP87]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARRAYIDX11_1_8]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP89:%.*]] = load i32, ptr [[ARRAYIDX13_1_8]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_8:%.*]] = add nsw i32 [[TMP89]], [[TMP88]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_8]], ptr [[ARRAYIDX13_1_8]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_8:%.*]]
+; CHECK-UNROLL: for.body7.2.8:
+; CHECK-UNROLL-NEXT: [[TMP90:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_8:%.*]] = getelementptr inbounds i32, ptr [[TMP90]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP91:%.*]] = load i32, ptr [[ARRAYIDX11_2_8]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARRAYIDX13_2_8]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_8:%.*]] = add nsw i32 [[TMP92]], [[TMP91]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_8]], ptr [[ARRAYIDX13_2_8]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_8:%.*]]
+; CHECK-UNROLL: for.body7.3.8:
+; CHECK-UNROLL-NEXT: [[TMP93:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_8:%.*]] = getelementptr inbounds i32, ptr [[TMP93]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP94:%.*]] = load i32, ptr [[ARRAYIDX11_3_8]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP95:%.*]] = load i32, ptr [[ARRAYIDX13_3_8]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_8:%.*]] = add nsw i32 [[TMP95]], [[TMP94]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_8]], ptr [[ARRAYIDX13_3_8]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_8:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.8:
+; CHECK-UNROLL-NEXT: [[CMP1_9:%.*]] = icmp eq i32 9, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_9]], label [[CLEANUP]], label [[IF_END_9:%.*]]
+; CHECK-UNROLL: if.end.9:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 9
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_9:%.*]]
+; CHECK-UNROLL: for.cond4.9:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_9:%.*]]
+; CHECK-UNROLL: for.body7.9:
+; CHECK-UNROLL-NEXT: [[TMP96:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP96]], align 4
+; CHECK-UNROLL-NEXT: [[TMP98:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_9:%.*]] = add nsw i32 [[TMP98]], [[TMP97]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_9]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_9:%.*]]
+; CHECK-UNROLL: for.body7.1.9:
+; CHECK-UNROLL-NEXT: [[TMP99:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_9:%.*]] = getelementptr inbounds i32, ptr [[TMP99]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP100:%.*]] = load i32, ptr [[ARRAYIDX11_1_9]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARRAYIDX13_1_9]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_9:%.*]] = add nsw i32 [[TMP101]], [[TMP100]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_9]], ptr [[ARRAYIDX13_1_9]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_9:%.*]]
+; CHECK-UNROLL: for.body7.2.9:
+; CHECK-UNROLL-NEXT: [[TMP102:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_9:%.*]] = getelementptr inbounds i32, ptr [[TMP102]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP103:%.*]] = load i32, ptr [[ARRAYIDX11_2_9]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP104:%.*]] = load i32, ptr [[ARRAYIDX13_2_9]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_9:%.*]] = add nsw i32 [[TMP104]], [[TMP103]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_9]], ptr [[ARRAYIDX13_2_9]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_9:%.*]]
+; CHECK-UNROLL: for.body7.3.9:
+; CHECK-UNROLL-NEXT: [[TMP105:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_9:%.*]] = getelementptr inbounds i32, ptr [[TMP105]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP106:%.*]] = load i32, ptr [[ARRAYIDX11_3_9]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP107:%.*]] = load i32, ptr [[ARRAYIDX13_3_9]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_9:%.*]] = add nsw i32 [[TMP107]], [[TMP106]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_9]], ptr [[ARRAYIDX13_3_9]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_9:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.9:
+; CHECK-UNROLL-NEXT: [[CMP1_10:%.*]] = icmp eq i32 10, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_10]], label [[CLEANUP]], label [[IF_END_10:%.*]]
+; CHECK-UNROLL: if.end.10:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 10
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_10:%.*]]
+; CHECK-UNROLL: for.cond4.10:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_10:%.*]]
+; CHECK-UNROLL: for.body7.10:
+; CHECK-UNROLL-NEXT: [[TMP108:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP108]], align 4
+; CHECK-UNROLL-NEXT: [[TMP110:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_10:%.*]] = add nsw i32 [[TMP110]], [[TMP109]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_10]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_10:%.*]]
+; CHECK-UNROLL: for.body7.1.10:
+; CHECK-UNROLL-NEXT: [[TMP111:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_10:%.*]] = getelementptr inbounds i32, ptr [[TMP111]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP112:%.*]] = load i32, ptr [[ARRAYIDX11_1_10]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP113:%.*]] = load i32, ptr [[ARRAYIDX13_1_10]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_10:%.*]] = add nsw i32 [[TMP113]], [[TMP112]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_10]], ptr [[ARRAYIDX13_1_10]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_10:%.*]]
+; CHECK-UNROLL: for.body7.2.10:
+; CHECK-UNROLL-NEXT: [[TMP114:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_10:%.*]] = getelementptr inbounds i32, ptr [[TMP114]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP115:%.*]] = load i32, ptr [[ARRAYIDX11_2_10]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP116:%.*]] = load i32, ptr [[ARRAYIDX13_2_10]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_10:%.*]] = add nsw i32 [[TMP116]], [[TMP115]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_10]], ptr [[ARRAYIDX13_2_10]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_10:%.*]]
+; CHECK-UNROLL: for.body7.3.10:
+; CHECK-UNROLL-NEXT: [[TMP117:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_10:%.*]] = getelementptr inbounds i32, ptr [[TMP117]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP118:%.*]] = load i32, ptr [[ARRAYIDX11_3_10]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP119:%.*]] = load i32, ptr [[ARRAYIDX13_3_10]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_10:%.*]] = add nsw i32 [[TMP119]], [[TMP118]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_10]], ptr [[ARRAYIDX13_3_10]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_10:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.10:
+; CHECK-UNROLL-NEXT: [[CMP1_11:%.*]] = icmp eq i32 11, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_11]], label [[CLEANUP]], label [[IF_END_11:%.*]]
+; CHECK-UNROLL: if.end.11:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 11
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_11:%.*]]
+; CHECK-UNROLL: for.cond4.11:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_11:%.*]]
+; CHECK-UNROLL: for.body7.11:
+; CHECK-UNROLL-NEXT: [[TMP120:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP120]], align 4
+; CHECK-UNROLL-NEXT: [[TMP122:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_119:%.*]] = add nsw i32 [[TMP122]], [[TMP121]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_119]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_11:%.*]]
+; CHECK-UNROLL: for.body7.1.11:
+; CHECK-UNROLL-NEXT: [[TMP123:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_11:%.*]] = getelementptr inbounds i32, ptr [[TMP123]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP124:%.*]] = load i32, ptr [[ARRAYIDX11_1_11]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP125:%.*]] = load i32, ptr [[ARRAYIDX13_1_11]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_11:%.*]] = add nsw i32 [[TMP125]], [[TMP124]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_11]], ptr [[ARRAYIDX13_1_11]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_11:%.*]]
+; CHECK-UNROLL: for.body7.2.11:
+; CHECK-UNROLL-NEXT: [[TMP126:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_11:%.*]] = getelementptr inbounds i32, ptr [[TMP126]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP127:%.*]] = load i32, ptr [[ARRAYIDX11_2_11]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP128:%.*]] = load i32, ptr [[ARRAYIDX13_2_11]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_11:%.*]] = add nsw i32 [[TMP128]], [[TMP127]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_11]], ptr [[ARRAYIDX13_2_11]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_11:%.*]]
+; CHECK-UNROLL: for.body7.3.11:
+; CHECK-UNROLL-NEXT: [[TMP129:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_11:%.*]] = getelementptr inbounds i32, ptr [[TMP129]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP130:%.*]] = load i32, ptr [[ARRAYIDX11_3_11]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP131:%.*]] = load i32, ptr [[ARRAYIDX13_3_11]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_11:%.*]] = add nsw i32 [[TMP131]], [[TMP130]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_11]], ptr [[ARRAYIDX13_3_11]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_11:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.11:
+; CHECK-UNROLL-NEXT: [[CMP1_12:%.*]] = icmp eq i32 12, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_12]], label [[CLEANUP]], label [[IF_END_12:%.*]]
+; CHECK-UNROLL: if.end.12:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 12
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_12:%.*]]
+; CHECK-UNROLL: for.cond4.12:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1210:%.*]]
+; CHECK-UNROLL: for.body7.1210:
+; CHECK-UNROLL-NEXT: [[TMP132:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT: [[TMP133:%.*]] = load i32, ptr [[TMP132]], align 4
+; CHECK-UNROLL-NEXT: [[TMP134:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_12:%.*]] = add nsw i32 [[TMP134]], [[TMP133]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_12]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_12:%.*]]
+; CHECK-UNROLL: for.body7.1.12:
+; CHECK-UNROLL-NEXT: [[TMP135:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_12:%.*]] = getelementptr inbounds i32, ptr [[TMP135]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP136:%.*]] = load i32, ptr [[ARRAYIDX11_1_12]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP137:%.*]] = load i32, ptr [[ARRAYIDX13_1_12]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_12:%.*]] = add nsw i32 [[TMP137]], [[TMP136]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_12]], ptr [[ARRAYIDX13_1_12]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_12:%.*]]
+; CHECK-UNROLL: for.body7.2.12:
+; CHECK-UNROLL-NEXT: [[TMP138:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_12:%.*]] = getelementptr inbounds i32, ptr [[TMP138]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP139:%.*]] = load i32, ptr [[ARRAYIDX11_2_12]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP140:%.*]] = load i32, ptr [[ARRAYIDX13_2_12]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_12:%.*]] = add nsw i32 [[TMP140]], [[TMP139]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_12]], ptr [[ARRAYIDX13_2_12]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_12:%.*]]
+; CHECK-UNROLL: for.body7.3.12:
+; CHECK-UNROLL-NEXT: [[TMP141:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_12:%.*]] = getelementptr inbounds i32, ptr [[TMP141]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP142:%.*]] = load i32, ptr [[ARRAYIDX11_3_12]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP143:%.*]] = load i32, ptr [[ARRAYIDX13_3_12]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_12:%.*]] = add nsw i32 [[TMP143]], [[TMP142]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_12]], ptr [[ARRAYIDX13_3_12]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_12:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.12:
+; CHECK-UNROLL-NEXT: [[CMP1_13:%.*]] = icmp eq i32 13, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_13]], label [[CLEANUP]], label [[IF_END_13:%.*]]
+; CHECK-UNROLL: if.end.13:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 13
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_13:%.*]]
+; CHECK-UNROLL: for.cond4.13:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_13:%.*]]
+; CHECK-UNROLL: for.body7.13:
+; CHECK-UNROLL-NEXT: [[TMP144:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT: [[TMP145:%.*]] = load i32, ptr [[TMP144]], align 4
+; CHECK-UNROLL-NEXT: [[TMP146:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_13:%.*]] = add nsw i32 [[TMP146]], [[TMP145]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_13]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_13:%.*]]
+; CHECK-UNROLL: for.body7.1.13:
+; CHECK-UNROLL-NEXT: [[TMP147:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_13:%.*]] = getelementptr inbounds i32, ptr [[TMP147]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP148:%.*]] = load i32, ptr [[ARRAYIDX11_1_13]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP149:%.*]] = load i32, ptr [[ARRAYIDX13_1_13]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_13:%.*]] = add nsw i32 [[TMP149]], [[TMP148]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_13]], ptr [[ARRAYIDX13_1_13]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_13:%.*]]
+; CHECK-UNROLL: for.body7.2.13:
+; CHECK-UNROLL-NEXT: [[TMP150:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_13:%.*]] = getelementptr inbounds i32, ptr [[TMP150]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX11_2_13]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP152:%.*]] = load i32, ptr [[ARRAYIDX13_2_13]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_13:%.*]] = add nsw i32 [[TMP152]], [[TMP151]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_13]], ptr [[ARRAYIDX13_2_13]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_13:%.*]]
+; CHECK-UNROLL: for.body7.3.13:
+; CHECK-UNROLL-NEXT: [[TMP153:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_13:%.*]] = getelementptr inbounds i32, ptr [[TMP153]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP154:%.*]] = load i32, ptr [[ARRAYIDX11_3_13]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP155:%.*]] = load i32, ptr [[ARRAYIDX13_3_13]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_13:%.*]] = add nsw i32 [[TMP155]], [[TMP154]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_13]], ptr [[ARRAYIDX13_3_13]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_13:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.13:
+; CHECK-UNROLL-NEXT: [[CMP1_14:%.*]] = icmp eq i32 14, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_14]], label [[CLEANUP]], label [[IF_END_14:%.*]]
+; CHECK-UNROLL: if.end.14:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 14
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_14:%.*]]
+; CHECK-UNROLL: for.cond4.14:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_14:%.*]]
+; CHECK-UNROLL: for.body7.14:
+; CHECK-UNROLL-NEXT: [[TMP156:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT: [[TMP157:%.*]] = load i32, ptr [[TMP156]], align 4
+; CHECK-UNROLL-NEXT: [[TMP158:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_14:%.*]] = add nsw i32 [[TMP158]], [[TMP157]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_14]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_14:%.*]]
+; CHECK-UNROLL: for.body7.1.14:
+; CHECK-UNROLL-NEXT: [[TMP159:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_14:%.*]] = getelementptr inbounds i32, ptr [[TMP159]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP160:%.*]] = load i32, ptr [[ARRAYIDX11_1_14]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP161:%.*]] = load i32, ptr [[ARRAYIDX13_1_14]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_14:%.*]] = add nsw i32 [[TMP161]], [[TMP160]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_14]], ptr [[ARRAYIDX13_1_14]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_14:%.*]]
+; CHECK-UNROLL: for.body7.2.14:
+; CHECK-UNROLL-NEXT: [[TMP162:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_14:%.*]] = getelementptr inbounds i32, ptr [[TMP162]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP163:%.*]] = load i32, ptr [[ARRAYIDX11_2_14]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP164:%.*]] = load i32, ptr [[ARRAYIDX13_2_14]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_14:%.*]] = add nsw i32 [[TMP164]], [[TMP163]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_14]], ptr [[ARRAYIDX13_2_14]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_14:%.*]]
+; CHECK-UNROLL: for.body7.3.14:
+; CHECK-UNROLL-NEXT: [[TMP165:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_14:%.*]] = getelementptr inbounds i32, ptr [[TMP165]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP166:%.*]] = load i32, ptr [[ARRAYIDX11_3_14]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP167:%.*]] = load i32, ptr [[ARRAYIDX13_3_14]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_14:%.*]] = add nsw i32 [[TMP167]], [[TMP166]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_14]], ptr [[ARRAYIDX13_3_14]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_14:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.14:
+; CHECK-UNROLL-NEXT: [[CMP1_15:%.*]] = icmp eq i32 15, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_15]], label [[CLEANUP]], label [[IF_END_15:%.*]]
+; CHECK-UNROLL: if.end.15:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 15
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_15:%.*]]
+; CHECK-UNROLL: for.cond4.15:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_15:%.*]]
+; CHECK-UNROLL: for.body7.15:
+; CHECK-UNROLL-NEXT: [[TMP168:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT: [[TMP169:%.*]] = load i32, ptr [[TMP168]], align 4
+; CHECK-UNROLL-NEXT: [[TMP170:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_15:%.*]] = add nsw i32 [[TMP170]], [[TMP169]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_15]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_15:%.*]]
+; CHECK-UNROLL: for.body7.1.15:
+; CHECK-UNROLL-NEXT: [[TMP171:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_15:%.*]] = getelementptr inbounds i32, ptr [[TMP171]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP172:%.*]] = load i32, ptr [[ARRAYIDX11_1_15]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP173:%.*]] = load i32, ptr [[ARRAYIDX13_1_15]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_15:%.*]] = add nsw i32 [[TMP173]], [[TMP172]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_15]], ptr [[ARRAYIDX13_1_15]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_15:%.*]]
+; CHECK-UNROLL: for.body7.2.15:
+; CHECK-UNROLL-NEXT: [[TMP174:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_15:%.*]] = getelementptr inbounds i32, ptr [[TMP174]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP175:%.*]] = load i32, ptr [[ARRAYIDX11_2_15]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP176:%.*]] = load i32, ptr [[ARRAYIDX13_2_15]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_15:%.*]] = add nsw i32 [[TMP176]], [[TMP175]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_15]], ptr [[ARRAYIDX13_2_15]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_15:%.*]]
+; CHECK-UNROLL: for.body7.3.15:
+; CHECK-UNROLL-NEXT: [[TMP177:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_15:%.*]] = getelementptr inbounds i32, ptr [[TMP177]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP178:%.*]] = load i32, ptr [[ARRAYIDX11_3_15]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP179:%.*]] = load i32, ptr [[ARRAYIDX13_3_15]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_15:%.*]] = add nsw i32 [[TMP179]], [[TMP178]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_15]], ptr [[ARRAYIDX13_3_15]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_15:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.15:
+; CHECK-UNROLL-NEXT: br i1 true, label [[CLEANUP]], label [[IF_END_16:%.*]]
+; CHECK-UNROLL: if.end.16:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 16
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_16:%.*]]
+; CHECK-UNROLL: for.cond4.16:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_16:%.*]]
+; CHECK-UNROLL: for.body7.16:
+; CHECK-UNROLL-NEXT: [[TMP180:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT: [[TMP181:%.*]] = load i32, ptr [[TMP180]], align 4
+; CHECK-UNROLL-NEXT: [[TMP182:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_16:%.*]] = add nsw i32 [[TMP182]], [[TMP181]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_16]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_16:%.*]]
+; CHECK-UNROLL: for.body7.1.16:
+; CHECK-UNROLL-NEXT: [[TMP183:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_16:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP184:%.*]] = load i32, ptr [[ARRAYIDX11_1_16]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP185:%.*]] = load i32, ptr [[ARRAYIDX13_1_16]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_16:%.*]] = add nsw i32 [[TMP185]], [[TMP184]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_16]], ptr [[ARRAYIDX13_1_16]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_16:%.*]]
+; CHECK-UNROLL: for.body7.2.16:
+; CHECK-UNROLL-NEXT: [[TMP186:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_16:%.*]] = getelementptr inbounds i32, ptr [[TMP186]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP187:%.*]] = load i32, ptr [[ARRAYIDX11_2_16]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP188:%.*]] = load i32, ptr [[ARRAYIDX13_2_16]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_16:%.*]] = add nsw i32 [[TMP188]], [[TMP187]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_16]], ptr [[ARRAYIDX13_2_16]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_16:%.*]]
+; CHECK-UNROLL: for.body7.3.16:
+; CHECK-UNROLL-NEXT: [[TMP189:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_16:%.*]] = getelementptr inbounds i32, ptr [[TMP189]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP190:%.*]] = load i32, ptr [[ARRAYIDX11_3_16]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP191:%.*]] = load i32, ptr [[ARRAYIDX13_3_16]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_16:%.*]] = add nsw i32 [[TMP191]], [[TMP190]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_16]], ptr [[ARRAYIDX13_3_16]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_16:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.16:
+; CHECK-UNROLL-NEXT: unreachable
+; CHECK-UNROLL: for.body7:
+; CHECK-UNROLL-NEXT: [[TMP192:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT: [[TMP193:%.*]] = load i32, ptr [[TMP192]], align 4
+; CHECK-UNROLL-NEXT: [[TMP194:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP194]], [[TMP193]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1:%.*]]
+; CHECK-UNROLL: for.body7.1:
+; CHECK-UNROLL-NEXT: [[TMP195:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP195]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP196:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP197:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1:%.*]] = add nsw i32 [[TMP197]], [[TMP196]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2:%.*]]
+; CHECK-UNROLL: for.body7.2:
+; CHECK-UNROLL-NEXT: [[TMP198:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP198]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP199:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP200:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2:%.*]] = add nsw i32 [[TMP200]], [[TMP199]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3:%.*]]
+; CHECK-UNROLL: for.body7.3:
+; CHECK-UNROLL-NEXT: [[TMP201:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP201]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP202:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP203:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3:%.*]] = add nsw i32 [[TMP203]], [[TMP202]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6:%.*]]
+; CHECK-UNROLL: for.body7.4:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_LCSSA:%.*]] = phi ptr [ [[ARR]], [[FOR_BODY7_3]] ], [ [[ARRAYIDX_1]], [[FOR_BODY7_3_1]] ], [ [[ARRAYIDX_2]], [[FOR_BODY7_3_2]] ], [ [[ARRAYIDX_3]], [[FOR_BODY7_3_3]] ], [ [[ARRAYIDX_4]], [[FOR_BODY7_3_4]] ], [ [[ARRAYIDX_5]], [[FOR_BODY7_3_5]] ], [ [[ARRAYIDX_6]], [[FOR_BODY7_3_6]] ], [ [[ARRAYIDX_7]], [[FOR_BODY7_3_7]] ], [ [[ARRAYIDX_8]], [[FOR_BODY7_3_8]] ], [ [[ARRAYIDX_9]], [[FOR_BODY7_3_9]] ], [ [[ARRAYIDX_10]], [[FOR_BODY7_3_10]] ], [ [[ARRAYIDX_11]], [[FOR_BODY7_3_11]] ], [ [[ARRAYIDX_12]], [[FOR_BODY7_3_12]] ], [ [[ARRAYIDX_13]], [[FOR_BODY7_3_13]] ], [ [[ARRAYIDX_14]], [[FOR_BODY7_3_14]] ], [ [[ARRAYIDX_15]], [[FOR_BODY7_3_15]] ], [ [[ARRAYIDX_16]], [[FOR_BODY7_3_16]] ]
+; CHECK-UNROLL-NEXT: [[TMP204:%.*]] = load ptr, ptr [[ARRAYIDX_LCSSA]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i32, ptr [[TMP204]], i64 4
+; CHECK-UNROLL-NEXT: [[TMP205:%.*]] = load i32, ptr [[ARRAYIDX11_4]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 4
+; CHECK-UNROLL-NEXT: [[TMP206:%.*]] = load i32, ptr [[ARRAYIDX13_4]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_4:%.*]] = add nsw i32 [[TMP206]], [[TMP205]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_4]], ptr [[ARRAYIDX13_4]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: unreachable
+; CHECK-UNROLL: cleanup:
+; CHECK-UNROLL-NEXT: ret void
+;
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.cond.cleanup6, %entry
+ %Dim.0 = phi i32 [ 0, %entry ], [ %inc16, %for.cond.cleanup6 ]
+ %Idx.addr.0 = phi i32 [ %Idx, %entry ], [ %add, %for.cond.cleanup6 ]
+ %cmp = icmp slt i32 %Dim.0, 16
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond
+ br label %cleanup
+
+for.body: ; preds = %for.cond
+ %cmp1 = icmp eq i32 %Dim.0, %Dims
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ br label %cleanup
+
+if.end: ; preds = %for.body
+ %idxprom = sext i32 %Dim.0 to i64
+ %arrayidx = getelementptr inbounds ptr, ptr %Arr, i64 %idxprom
+ %0 = load ptr, ptr %arrayidx, align 8
+ %idxprom2 = sext i32 %Idx.addr.0 to i64
+ %arrayidx3 = getelementptr inbounds i32, ptr %0, i64 %idxprom2
+ %1 = load i32, ptr %arrayidx3, align 4
+ %add = add nsw i32 %1, 1
+ br label %for.cond4
+
+for.cond4: ; preds = %for.body7, %if.end
+ %arg.0 = phi i32 [ 0, %if.end ], [ %inc, %for.body7 ]
+ %cmp5 = icmp slt i32 %arg.0, 4
+ br i1 %cmp5, label %for.body7, label %for.cond.cleanup6
+
+for.cond.cleanup6: ; preds = %for.cond4
+ %inc16 = add nsw i32 %Dim.0, 1
+ br label %for.cond, !llvm.loop !0
+
+for.body7: ; preds = %for.cond4
+ %2 = load ptr, ptr %arrayidx, align 8
+ %idxprom10 = sext i32 %arg.0 to i64
+ %arrayidx11 = getelementptr inbounds i32, ptr %2, i64 %idxprom10
+ %3 = load i32, ptr %arrayidx11, align 4
+ %arrayidx13 = getelementptr inbounds i32, ptr %Out, i64 %idxprom10
+ %4 = load i32, ptr %arrayidx13, align 4
+ %add14 = add nsw i32 %4, %3
+ store i32 %add14, ptr %arrayidx13, align 4
+ call void @_Z3barv()
+ %inc = add nsw i32 %arg.0, 1
+ br label %for.cond4, !llvm.loop !3
+
+cleanup: ; preds = %if.then, %for.cond.cleanup
+ ret void
+}
+
+ declare void @_Z3barv()
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.mustprogress"}
+!2 = !{!"llvm.loop.unroll.enable"}
+!3 = distinct !{!3, !1}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.enable"}
----------------
xiangzh1 wrote:
Yes! Sorry, I miss remove it.
https://github.com/llvm/llvm-project/pull/74703
More information about the llvm-commits
mailing list