[clang] [llvm] [SimplifyCFG] Not folding branch in loop header with constant iterations (PR #74268)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 4 00:34:33 PST 2023


https://github.com/xiangzh1 updated https://github.com/llvm/llvm-project/pull/74268

>From 21fef821eb83d0d426a5e2de20469dcdd41598f1 Mon Sep 17 00:00:00 2001
From: Zhang Xiang <xiang.zhang at iluvatar.com>
Date: Mon, 4 Dec 2023 16:25:37 +0800
Subject: [PATCH 1/2] [SimplifyCFG] Pre-commit test for folding branches in
 simplify cfg

---
 clang/test/CodeGenCUDA/simplify-cfg-unroll.cu | 86 +++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 clang/test/CodeGenCUDA/simplify-cfg-unroll.cu

diff --git a/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
new file mode 100644
index 0000000000000..b6502ce76c298
--- /dev/null
+++ b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
@@ -0,0 +1,86 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -O2 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "amdgcn-amd-amdhsa" \
+// RUN:    -fcuda-is-device "-aux-target-cpu" "x86-64" -emit-llvm -o - %s | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+__device__ void bar();
+
+// CHECK-LABEL: define dso_local void @_Z4funciPPiiS_(
+// CHECK-SAME: i32 noundef [[IDX:%.*]], ptr nocapture noundef readonly [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr nocapture noundef [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP127:%.*]] = icmp eq i32 [[DIMS]], 0
+// CHECK-NEXT:    br i1 [[CMP127]], label [[CLEANUP:%.*]], label [[IF_END_PREHEADER:%.*]]
+// CHECK:       if.end.preheader:
+// CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[DIMS]], -1
+// CHECK-NEXT:    [[UMIN:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 15)
+// CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[UMIN]], 1
+// CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[TMP1]] to i64
+// CHECK-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+// CHECK-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+// CHECK-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+// CHECK-NEXT:    br label [[IF_END:%.*]]
+// CHECK:       if.end:
+// CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[IF_END_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ]
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[INDVARS_IV]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// CHECK-NEXT:    store i32 [[ADD14]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+// CHECK-NEXT:    store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3]]
+// CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK-NEXT:    store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3]]
+// CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+// CHECK-NEXT:    store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3]]
+// CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+// CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+// CHECK-NEXT:    br i1 [[EXITCOND]], label [[CLEANUP]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK:       cleanup:
+// CHECK-NEXT:    ret void
+//
+__device__ void func(int Idx, int *Arr[], int Dims, int *Out) {
+  #pragma unroll
+  for (int Dim = 0; Dim < 16; ++Dim) {
+    if (Dim == Dims) {
+      break;
+    }
+    int divmod = Arr[Dim][Idx];
+    Idx = divmod + 1;
+
+    for (int arg = 0; arg < 4; arg++) {
+      Out[arg] += Arr[Dim][arg];
+      bar();
+    }
+  }
+}
+//.
+// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"Simple C++ TBAA"}
+// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK: [[META8]] = !{!"int", [[META5]], i64 0}
+// CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+// CHECK: [[META10]] = !{!"llvm.loop.mustprogress"}
+// CHECK: [[META11]] = !{!"llvm.loop.unroll.enable"}
+//.

>From e9632236b80710416dd1785cc7665b6e30dd28a5 Mon Sep 17 00:00:00 2001
From: Zhang Xiang <xiang.zhang at iluvatar.com>
Date: Mon, 4 Dec 2023 16:26:14 +0800
Subject: [PATCH 2/2] [SimplifyCFG] Not folding branch in constant loops which
 expected unroll

Constant iteration loop with unroll hint usually expected do unroll
by consumers, folding branches in such loop header at SimplifyCFG will
break unroll optimization.
---
 clang/test/CodeGenCUDA/simplify-cfg-unroll.cu | 536 ++++++++++++++++--
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     |  57 ++
 2 files changed, 551 insertions(+), 42 deletions(-)

diff --git a/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
index b6502ce76c298..20506bc24806a 100644
--- a/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
+++ b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
@@ -11,50 +11,505 @@ __device__ void bar();
 // CHECK-LABEL: define dso_local void @_Z4funciPPiiS_(
 // CHECK-SAME: i32 noundef [[IDX:%.*]], ptr nocapture noundef readonly [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr nocapture noundef [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP127:%.*]] = icmp eq i32 [[DIMS]], 0
-// CHECK-NEXT:    br i1 [[CMP127]], label [[CLEANUP:%.*]], label [[IF_END_PREHEADER:%.*]]
-// CHECK:       if.end.preheader:
-// CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[DIMS]], -1
-// CHECK-NEXT:    [[UMIN:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 15)
-// CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[UMIN]], 1
-// CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[TMP1]] to i64
-// CHECK-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
-// CHECK-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
-// CHECK-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
-// CHECK-NEXT:    br label [[IF_END:%.*]]
+// CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIMS]], 0
+// CHECK-NEXT:    br i1 [[CMP1]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
 // CHECK:       if.end:
-// CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[IF_END_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[INDVARS_IV]]
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3:![0-9]+]]
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR]], align 8, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
 // CHECK-NEXT:    store i32 [[ADD14]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3:[0-9]+]]
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
-// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARR]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
 // CHECK-NEXT:    store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3]]
-// CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2
-// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARR]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 2
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
 // CHECK-NEXT:    store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3]]
-// CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3
-// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[ARR]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 3
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP11]], [[TMP10]]
 // CHECK-NEXT:    store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3]]
-// CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-// CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-// CHECK-NEXT:    br i1 [[EXITCOND]], label [[CLEANUP]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_1:%.*]] = icmp eq i32 [[DIMS]], 1
+// CHECK-NEXT:    br i1 [[CMP1_1]], label [[CLEANUP]], label [[IF_END_1:%.*]]
+// CHECK:       if.end.1:
+// CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 1
+// CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_129:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// CHECK-NEXT:    store i32 [[ADD14_129]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_1:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 1
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11_1_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_1:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+// CHECK-NEXT:    store i32 [[ADD14_1_1]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_1:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 2
+// CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX11_2_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_1:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+// CHECK-NEXT:    store i32 [[ADD14_2_1]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_1:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 3
+// CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11_3_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_1:%.*]] = add nsw i32 [[TMP23]], [[TMP22]]
+// CHECK-NEXT:    store i32 [[ADD14_3_1]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_2:%.*]] = icmp eq i32 [[DIMS]], 2
+// CHECK-NEXT:    br i1 [[CMP1_2]], label [[CLEANUP]], label [[IF_END_2:%.*]]
+// CHECK:       if.end.2:
+// CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 2
+// CHECK-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_230:%.*]] = add nsw i32 [[TMP26]], [[TMP25]]
+// CHECK-NEXT:    store i32 [[ADD14_230]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_2:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 1
+// CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX11_1_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_2:%.*]] = add nsw i32 [[TMP29]], [[TMP28]]
+// CHECK-NEXT:    store i32 [[ADD14_1_2]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_2:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 2
+// CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX11_2_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_2:%.*]] = add nsw i32 [[TMP32]], [[TMP31]]
+// CHECK-NEXT:    store i32 [[ADD14_2_2]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_2:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 3
+// CHECK-NEXT:    [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX11_3_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_2:%.*]] = add nsw i32 [[TMP35]], [[TMP34]]
+// CHECK-NEXT:    store i32 [[ADD14_3_2]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_3:%.*]] = icmp eq i32 [[DIMS]], 3
+// CHECK-NEXT:    br i1 [[CMP1_3]], label [[CLEANUP]], label [[IF_END_3:%.*]]
+// CHECK:       if.end.3:
+// CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 3
+// CHECK-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_331:%.*]] = add nsw i32 [[TMP38]], [[TMP37]]
+// CHECK-NEXT:    store i32 [[ADD14_331]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_3:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 1
+// CHECK-NEXT:    [[TMP40:%.*]] = load i32, ptr [[ARRAYIDX11_1_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_3:%.*]] = add nsw i32 [[TMP41]], [[TMP40]]
+// CHECK-NEXT:    store i32 [[ADD14_1_3]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP42:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_3:%.*]] = getelementptr inbounds i32, ptr [[TMP42]], i64 2
+// CHECK-NEXT:    [[TMP43:%.*]] = load i32, ptr [[ARRAYIDX11_2_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP44:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_3:%.*]] = add nsw i32 [[TMP44]], [[TMP43]]
+// CHECK-NEXT:    store i32 [[ADD14_2_3]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP45:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_3:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i64 3
+// CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX11_3_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP47:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_3:%.*]] = add nsw i32 [[TMP47]], [[TMP46]]
+// CHECK-NEXT:    store i32 [[ADD14_3_3]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_4:%.*]] = icmp eq i32 [[DIMS]], 4
+// CHECK-NEXT:    br i1 [[CMP1_4]], label [[CLEANUP]], label [[IF_END_4:%.*]]
+// CHECK:       if.end.4:
+// CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 4
+// CHECK-NEXT:    [[TMP48:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP50:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_4:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
+// CHECK-NEXT:    store i32 [[ADD14_4]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP51:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_4:%.*]] = getelementptr inbounds i32, ptr [[TMP51]], i64 1
+// CHECK-NEXT:    [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX11_1_4]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_4:%.*]] = add nsw i32 [[TMP53]], [[TMP52]]
+// CHECK-NEXT:    store i32 [[ADD14_1_4]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_4:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i64 2
+// CHECK-NEXT:    [[TMP55:%.*]] = load i32, ptr [[ARRAYIDX11_2_4]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP56:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_4:%.*]] = add nsw i32 [[TMP56]], [[TMP55]]
+// CHECK-NEXT:    store i32 [[ADD14_2_4]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP57:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_4:%.*]] = getelementptr inbounds i32, ptr [[TMP57]], i64 3
+// CHECK-NEXT:    [[TMP58:%.*]] = load i32, ptr [[ARRAYIDX11_3_4]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP59:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_4:%.*]] = add nsw i32 [[TMP59]], [[TMP58]]
+// CHECK-NEXT:    store i32 [[ADD14_3_4]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_5:%.*]] = icmp eq i32 [[DIMS]], 5
+// CHECK-NEXT:    br i1 [[CMP1_5]], label [[CLEANUP]], label [[IF_END_5:%.*]]
+// CHECK:       if.end.5:
+// CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 5
+// CHECK-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP62:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_5:%.*]] = add nsw i32 [[TMP62]], [[TMP61]]
+// CHECK-NEXT:    store i32 [[ADD14_5]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_5:%.*]] = getelementptr inbounds i32, ptr [[TMP63]], i64 1
+// CHECK-NEXT:    [[TMP64:%.*]] = load i32, ptr [[ARRAYIDX11_1_5]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP65:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_5:%.*]] = add nsw i32 [[TMP65]], [[TMP64]]
+// CHECK-NEXT:    store i32 [[ADD14_1_5]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP66:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_5:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i64 2
+// CHECK-NEXT:    [[TMP67:%.*]] = load i32, ptr [[ARRAYIDX11_2_5]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP68:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_5:%.*]] = add nsw i32 [[TMP68]], [[TMP67]]
+// CHECK-NEXT:    store i32 [[ADD14_2_5]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_5:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 3
+// CHECK-NEXT:    [[TMP70:%.*]] = load i32, ptr [[ARRAYIDX11_3_5]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP71:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_5:%.*]] = add nsw i32 [[TMP71]], [[TMP70]]
+// CHECK-NEXT:    store i32 [[ADD14_3_5]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_6:%.*]] = icmp eq i32 [[DIMS]], 6
+// CHECK-NEXT:    br i1 [[CMP1_6]], label [[CLEANUP]], label [[IF_END_6:%.*]]
+// CHECK:       if.end.6:
+// CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 6
+// CHECK-NEXT:    [[TMP72:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP74:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_6:%.*]] = add nsw i32 [[TMP74]], [[TMP73]]
+// CHECK-NEXT:    store i32 [[ADD14_6]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP75:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_6:%.*]] = getelementptr inbounds i32, ptr [[TMP75]], i64 1
+// CHECK-NEXT:    [[TMP76:%.*]] = load i32, ptr [[ARRAYIDX11_1_6]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP77:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_6:%.*]] = add nsw i32 [[TMP77]], [[TMP76]]
+// CHECK-NEXT:    store i32 [[ADD14_1_6]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP78:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_6:%.*]] = getelementptr inbounds i32, ptr [[TMP78]], i64 2
+// CHECK-NEXT:    [[TMP79:%.*]] = load i32, ptr [[ARRAYIDX11_2_6]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP80:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_6:%.*]] = add nsw i32 [[TMP80]], [[TMP79]]
+// CHECK-NEXT:    store i32 [[ADD14_2_6]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP81:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_6:%.*]] = getelementptr inbounds i32, ptr [[TMP81]], i64 3
+// CHECK-NEXT:    [[TMP82:%.*]] = load i32, ptr [[ARRAYIDX11_3_6]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP83:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_6:%.*]] = add nsw i32 [[TMP83]], [[TMP82]]
+// CHECK-NEXT:    store i32 [[ADD14_3_6]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_7:%.*]] = icmp eq i32 [[DIMS]], 7
+// CHECK-NEXT:    br i1 [[CMP1_7]], label [[CLEANUP]], label [[IF_END_7:%.*]]
+// CHECK:       if.end.7:
+// CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 7
+// CHECK-NEXT:    [[TMP84:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP86:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_7:%.*]] = add nsw i32 [[TMP86]], [[TMP85]]
+// CHECK-NEXT:    store i32 [[ADD14_7]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP87:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_7:%.*]] = getelementptr inbounds i32, ptr [[TMP87]], i64 1
+// CHECK-NEXT:    [[TMP88:%.*]] = load i32, ptr [[ARRAYIDX11_1_7]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP89:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_7:%.*]] = add nsw i32 [[TMP89]], [[TMP88]]
+// CHECK-NEXT:    store i32 [[ADD14_1_7]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP90:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_7:%.*]] = getelementptr inbounds i32, ptr [[TMP90]], i64 2
+// CHECK-NEXT:    [[TMP91:%.*]] = load i32, ptr [[ARRAYIDX11_2_7]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP92:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_7:%.*]] = add nsw i32 [[TMP92]], [[TMP91]]
+// CHECK-NEXT:    store i32 [[ADD14_2_7]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP93:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_7:%.*]] = getelementptr inbounds i32, ptr [[TMP93]], i64 3
+// CHECK-NEXT:    [[TMP94:%.*]] = load i32, ptr [[ARRAYIDX11_3_7]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP95:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_7:%.*]] = add nsw i32 [[TMP95]], [[TMP94]]
+// CHECK-NEXT:    store i32 [[ADD14_3_7]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_8:%.*]] = icmp eq i32 [[DIMS]], 8
+// CHECK-NEXT:    br i1 [[CMP1_8]], label [[CLEANUP]], label [[IF_END_8:%.*]]
+// CHECK:       if.end.8:
+// CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 8
+// CHECK-NEXT:    [[TMP96:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP97:%.*]] = load i32, ptr [[TMP96]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP98:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_8:%.*]] = add nsw i32 [[TMP98]], [[TMP97]]
+// CHECK-NEXT:    store i32 [[ADD14_8]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP99:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_8:%.*]] = getelementptr inbounds i32, ptr [[TMP99]], i64 1
+// CHECK-NEXT:    [[TMP100:%.*]] = load i32, ptr [[ARRAYIDX11_1_8]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP101:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_8:%.*]] = add nsw i32 [[TMP101]], [[TMP100]]
+// CHECK-NEXT:    store i32 [[ADD14_1_8]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP102:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_8:%.*]] = getelementptr inbounds i32, ptr [[TMP102]], i64 2
+// CHECK-NEXT:    [[TMP103:%.*]] = load i32, ptr [[ARRAYIDX11_2_8]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP104:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_8:%.*]] = add nsw i32 [[TMP104]], [[TMP103]]
+// CHECK-NEXT:    store i32 [[ADD14_2_8]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP105:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_8:%.*]] = getelementptr inbounds i32, ptr [[TMP105]], i64 3
+// CHECK-NEXT:    [[TMP106:%.*]] = load i32, ptr [[ARRAYIDX11_3_8]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP107:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_8:%.*]] = add nsw i32 [[TMP107]], [[TMP106]]
+// CHECK-NEXT:    store i32 [[ADD14_3_8]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_9:%.*]] = icmp eq i32 [[DIMS]], 9
+// CHECK-NEXT:    br i1 [[CMP1_9]], label [[CLEANUP]], label [[IF_END_9:%.*]]
+// CHECK:       if.end.9:
+// CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 9
+// CHECK-NEXT:    [[TMP108:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP109:%.*]] = load i32, ptr [[TMP108]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP110:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_9:%.*]] = add nsw i32 [[TMP110]], [[TMP109]]
+// CHECK-NEXT:    store i32 [[ADD14_9]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP111:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_9:%.*]] = getelementptr inbounds i32, ptr [[TMP111]], i64 1
+// CHECK-NEXT:    [[TMP112:%.*]] = load i32, ptr [[ARRAYIDX11_1_9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP113:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_9:%.*]] = add nsw i32 [[TMP113]], [[TMP112]]
+// CHECK-NEXT:    store i32 [[ADD14_1_9]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP114:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_9:%.*]] = getelementptr inbounds i32, ptr [[TMP114]], i64 2
+// CHECK-NEXT:    [[TMP115:%.*]] = load i32, ptr [[ARRAYIDX11_2_9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP116:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_9:%.*]] = add nsw i32 [[TMP116]], [[TMP115]]
+// CHECK-NEXT:    store i32 [[ADD14_2_9]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP117:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_9:%.*]] = getelementptr inbounds i32, ptr [[TMP117]], i64 3
+// CHECK-NEXT:    [[TMP118:%.*]] = load i32, ptr [[ARRAYIDX11_3_9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP119:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_9:%.*]] = add nsw i32 [[TMP119]], [[TMP118]]
+// CHECK-NEXT:    store i32 [[ADD14_3_9]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_10:%.*]] = icmp eq i32 [[DIMS]], 10
+// CHECK-NEXT:    br i1 [[CMP1_10]], label [[CLEANUP]], label [[IF_END_10:%.*]]
+// CHECK:       if.end.10:
+// CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 10
+// CHECK-NEXT:    [[TMP120:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP121:%.*]] = load i32, ptr [[TMP120]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP122:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_10:%.*]] = add nsw i32 [[TMP122]], [[TMP121]]
+// CHECK-NEXT:    store i32 [[ADD14_10]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP123:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_10:%.*]] = getelementptr inbounds i32, ptr [[TMP123]], i64 1
+// CHECK-NEXT:    [[TMP124:%.*]] = load i32, ptr [[ARRAYIDX11_1_10]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP125:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_10:%.*]] = add nsw i32 [[TMP125]], [[TMP124]]
+// CHECK-NEXT:    store i32 [[ADD14_1_10]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP126:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_10:%.*]] = getelementptr inbounds i32, ptr [[TMP126]], i64 2
+// CHECK-NEXT:    [[TMP127:%.*]] = load i32, ptr [[ARRAYIDX11_2_10]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP128:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_10:%.*]] = add nsw i32 [[TMP128]], [[TMP127]]
+// CHECK-NEXT:    store i32 [[ADD14_2_10]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP129:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_10:%.*]] = getelementptr inbounds i32, ptr [[TMP129]], i64 3
+// CHECK-NEXT:    [[TMP130:%.*]] = load i32, ptr [[ARRAYIDX11_3_10]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP131:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_10:%.*]] = add nsw i32 [[TMP131]], [[TMP130]]
+// CHECK-NEXT:    store i32 [[ADD14_3_10]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_11:%.*]] = icmp eq i32 [[DIMS]], 11
+// CHECK-NEXT:    br i1 [[CMP1_11]], label [[CLEANUP]], label [[IF_END_11:%.*]]
+// CHECK:       if.end.11:
+// CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 11
+// CHECK-NEXT:    [[TMP132:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP133:%.*]] = load i32, ptr [[TMP132]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP134:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_11:%.*]] = add nsw i32 [[TMP134]], [[TMP133]]
+// CHECK-NEXT:    store i32 [[ADD14_11]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP135:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_11:%.*]] = getelementptr inbounds i32, ptr [[TMP135]], i64 1
+// CHECK-NEXT:    [[TMP136:%.*]] = load i32, ptr [[ARRAYIDX11_1_11]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP137:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_11:%.*]] = add nsw i32 [[TMP137]], [[TMP136]]
+// CHECK-NEXT:    store i32 [[ADD14_1_11]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP138:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_11:%.*]] = getelementptr inbounds i32, ptr [[TMP138]], i64 2
+// CHECK-NEXT:    [[TMP139:%.*]] = load i32, ptr [[ARRAYIDX11_2_11]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP140:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_11:%.*]] = add nsw i32 [[TMP140]], [[TMP139]]
+// CHECK-NEXT:    store i32 [[ADD14_2_11]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP141:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_11:%.*]] = getelementptr inbounds i32, ptr [[TMP141]], i64 3
+// CHECK-NEXT:    [[TMP142:%.*]] = load i32, ptr [[ARRAYIDX11_3_11]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP143:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_11:%.*]] = add nsw i32 [[TMP143]], [[TMP142]]
+// CHECK-NEXT:    store i32 [[ADD14_3_11]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_12:%.*]] = icmp eq i32 [[DIMS]], 12
+// CHECK-NEXT:    br i1 [[CMP1_12]], label [[CLEANUP]], label [[IF_END_12:%.*]]
+// CHECK:       if.end.12:
+// CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 12
+// CHECK-NEXT:    [[TMP144:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP145:%.*]] = load i32, ptr [[TMP144]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP146:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_12:%.*]] = add nsw i32 [[TMP146]], [[TMP145]]
+// CHECK-NEXT:    store i32 [[ADD14_12]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP147:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_12:%.*]] = getelementptr inbounds i32, ptr [[TMP147]], i64 1
+// CHECK-NEXT:    [[TMP148:%.*]] = load i32, ptr [[ARRAYIDX11_1_12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP149:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_12:%.*]] = add nsw i32 [[TMP149]], [[TMP148]]
+// CHECK-NEXT:    store i32 [[ADD14_1_12]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP150:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_12:%.*]] = getelementptr inbounds i32, ptr [[TMP150]], i64 2
+// CHECK-NEXT:    [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX11_2_12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP152:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_12:%.*]] = add nsw i32 [[TMP152]], [[TMP151]]
+// CHECK-NEXT:    store i32 [[ADD14_2_12]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP153:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_12:%.*]] = getelementptr inbounds i32, ptr [[TMP153]], i64 3
+// CHECK-NEXT:    [[TMP154:%.*]] = load i32, ptr [[ARRAYIDX11_3_12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP155:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_12:%.*]] = add nsw i32 [[TMP155]], [[TMP154]]
+// CHECK-NEXT:    store i32 [[ADD14_3_12]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_13:%.*]] = icmp eq i32 [[DIMS]], 13
+// CHECK-NEXT:    br i1 [[CMP1_13]], label [[CLEANUP]], label [[IF_END_13:%.*]]
+// CHECK:       if.end.13:
+// CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 13
+// CHECK-NEXT:    [[TMP156:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP157:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP158:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_13:%.*]] = add nsw i32 [[TMP158]], [[TMP157]]
+// CHECK-NEXT:    store i32 [[ADD14_13]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP159:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_13:%.*]] = getelementptr inbounds i32, ptr [[TMP159]], i64 1
+// CHECK-NEXT:    [[TMP160:%.*]] = load i32, ptr [[ARRAYIDX11_1_13]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP161:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_13:%.*]] = add nsw i32 [[TMP161]], [[TMP160]]
+// CHECK-NEXT:    store i32 [[ADD14_1_13]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP162:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_13:%.*]] = getelementptr inbounds i32, ptr [[TMP162]], i64 2
+// CHECK-NEXT:    [[TMP163:%.*]] = load i32, ptr [[ARRAYIDX11_2_13]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP164:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_13:%.*]] = add nsw i32 [[TMP164]], [[TMP163]]
+// CHECK-NEXT:    store i32 [[ADD14_2_13]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP165:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_13:%.*]] = getelementptr inbounds i32, ptr [[TMP165]], i64 3
+// CHECK-NEXT:    [[TMP166:%.*]] = load i32, ptr [[ARRAYIDX11_3_13]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP167:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_13:%.*]] = add nsw i32 [[TMP167]], [[TMP166]]
+// CHECK-NEXT:    store i32 [[ADD14_3_13]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_14:%.*]] = icmp eq i32 [[DIMS]], 14
+// CHECK-NEXT:    br i1 [[CMP1_14]], label [[CLEANUP]], label [[IF_END_14:%.*]]
+// CHECK:       if.end.14:
+// CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 14
+// CHECK-NEXT:    [[TMP168:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP169:%.*]] = load i32, ptr [[TMP168]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP170:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_14:%.*]] = add nsw i32 [[TMP170]], [[TMP169]]
+// CHECK-NEXT:    store i32 [[ADD14_14]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP171:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_14:%.*]] = getelementptr inbounds i32, ptr [[TMP171]], i64 1
+// CHECK-NEXT:    [[TMP172:%.*]] = load i32, ptr [[ARRAYIDX11_1_14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP173:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_14:%.*]] = add nsw i32 [[TMP173]], [[TMP172]]
+// CHECK-NEXT:    store i32 [[ADD14_1_14]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP174:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_14:%.*]] = getelementptr inbounds i32, ptr [[TMP174]], i64 2
+// CHECK-NEXT:    [[TMP175:%.*]] = load i32, ptr [[ARRAYIDX11_2_14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP176:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_14:%.*]] = add nsw i32 [[TMP176]], [[TMP175]]
+// CHECK-NEXT:    store i32 [[ADD14_2_14]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP177:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_14:%.*]] = getelementptr inbounds i32, ptr [[TMP177]], i64 3
+// CHECK-NEXT:    [[TMP178:%.*]] = load i32, ptr [[ARRAYIDX11_3_14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP179:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_14:%.*]] = add nsw i32 [[TMP179]], [[TMP178]]
+// CHECK-NEXT:    store i32 [[ADD14_3_14]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_15:%.*]] = icmp eq i32 [[DIMS]], 15
+// CHECK-NEXT:    br i1 [[CMP1_15]], label [[CLEANUP]], label [[IF_END_15:%.*]]
+// CHECK:       if.end.15:
+// CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 15
+// CHECK-NEXT:    [[TMP180:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP181:%.*]] = load i32, ptr [[TMP180]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP182:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_15:%.*]] = add nsw i32 [[TMP182]], [[TMP181]]
+// CHECK-NEXT:    store i32 [[ADD14_15]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP183:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_15:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i64 1
+// CHECK-NEXT:    [[TMP184:%.*]] = load i32, ptr [[ARRAYIDX11_1_15]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP185:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_15:%.*]] = add nsw i32 [[TMP185]], [[TMP184]]
+// CHECK-NEXT:    store i32 [[ADD14_1_15]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP186:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_15:%.*]] = getelementptr inbounds i32, ptr [[TMP186]], i64 2
+// CHECK-NEXT:    [[TMP187:%.*]] = load i32, ptr [[ARRAYIDX11_2_15]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP188:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_15:%.*]] = add nsw i32 [[TMP188]], [[TMP187]]
+// CHECK-NEXT:    store i32 [[ADD14_2_15]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP189:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_15:%.*]] = getelementptr inbounds i32, ptr [[TMP189]], i64 3
+// CHECK-NEXT:    [[TMP190:%.*]] = load i32, ptr [[ARRAYIDX11_3_15]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP191:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_15:%.*]] = add nsw i32 [[TMP191]], [[TMP190]]
+// CHECK-NEXT:    store i32 [[ADD14_3_15]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    br label [[CLEANUP]]
 // CHECK:       cleanup:
 // CHECK-NEXT:    ret void
 //
@@ -80,7 +535,4 @@ __device__ void func(int Idx, int *Arr[], int Dims, int *Out) {
 // CHECK: [[META6]] = !{!"Simple C++ TBAA"}
 // CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
 // CHECK: [[META8]] = !{!"int", [[META5]], i64 0}
-// CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
-// CHECK: [[META10]] = !{!"llvm.loop.mustprogress"}
-// CHECK: [[META11]] = !{!"llvm.loop.unroll.enable"}
 //.
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c09cf9c2325c4..ca5a55d9cd17c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
 #include <cassert>
@@ -3634,6 +3635,59 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
   }
 }
 
+bool hasUnrollHint(Instruction *TI) {
+  MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
+  if (!MD)
+    return false;
+
+  return GetUnrollMetadata(MD, "llvm.loop.unroll.enable") ||
+         GetUnrollMetadata(MD, "llvm.loop.unroll.full") ||
+         GetUnrollMetadata(MD, "llvm.loop.unroll.count");
+}
+
+// Escape folding "I < ConstNum" with "Cond2" when loops with constant
+// iterations and expected unroll.
+// #pragma unroll
+// for (int I = 0; I < ConstNum; ++I) { // ConstNum > 1
+//   if (Cond2) {
+//     break;
+//   }
+//    xxx loop body;
+//  }
+// Folding these conditional branches may break/affect loop unroll.
+static bool isConstantLoopWithUnrollHint(BranchInst *PBI) {
+  ICmpInst *ICmp = dyn_cast<ICmpInst>(PBI->getCondition());
+  if (!ICmp)
+    return false;
+
+  // Make sure ConstNum > 1
+  bool DoFold = true;
+  for (unsigned I = 0; I < ICmp->getNumOperands(); ++I) {
+    ConstantInt *Op = dyn_cast<ConstantInt>(ICmp->getOperand(I));
+    if (!Op)
+      continue;
+    if (Op->getSExtValue() > 1) {
+      DoFold = false;
+      break;
+    }
+  }
+  if (DoFold)
+    return false;
+
+  // Loop information has not been established yet, so here we easily judge
+  // whether it is a loop by backedge.
+  BasicBlock *PBB = PBI->getParent();
+  for (Function::iterator I = PBB->getIterator(), E = PBB->getParent()->end();
+       I != E; ++I) {
+    BasicBlock *BB = &*I;
+    if (is_contained(predecessors(PBB), BB)) {
+      if (hasUnrollHint(BB->getTerminator()))
+        return true;
+    }
+  }
+  return false;
+}
+
 /// Determine if the two branches share a common destination and deduce a glue
 /// that joins the branches' conditions to arrive at the common destination if
 /// that would be profitable.
@@ -3645,6 +3699,9 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
   assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
          "PredBB must be a predecessor of BB.");
 
+  if (isConstantLoopWithUnrollHint(PBI))
+    return std::nullopt;
+
   // We have the potential to fold the conditions together, but if the
   // predecessor branch is predictable, we may not want to merge them.
   uint64_t PTWeight, PFWeight;



More information about the llvm-commits mailing list