[llvm] [clang] [SimplifyCFG] Not folding branch in loop header with constant iterations (PR #74268)

via cfe-commits cfe-commits at lists.llvm.org
Sun Dec 3 18:17:42 PST 2023


https://github.com/xiangzh1 created https://github.com/llvm/llvm-project/pull/74268

Loop header with constant usually can be optimized in unroll, folding branch in such loop header at SimplifyCFG will break unroll optimization.
for example:
 Escape folding "I < ConstNum" with "Cond2" due to loops of constant  iterations can be easily optimized (e.g unroll).
 for (int I = 0; I < ConstNum; ++I) { // ConstNum > 1
  if (Cond2) {
     break;
   }
    xxx loop body;
  }
 Folding these conditional branches may break loop optimizations.

>From 9f3ff6f8e542e33d9bc63f628b658e93ffe8a687 Mon Sep 17 00:00:00 2001
From: Zhang Xiang <xiang.zhang at iluvatar.com>
Date: Sun, 3 Dec 2023 09:40:19 +0800
Subject: [PATCH] [SimplifyCFG] Not folding branch in loop header with constant
 iterations Loop header with constant usually can be optimized in unroll,
 folding branch in such loop header at SimplifyCFG will break unroll
 optimization.

---
 clang/test/CodeGenCUDA/simplify-cfg-unroll.cu | 364 ++++++++++++++++++
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     |  43 +++
 .../LoopVectorize/if-pred-non-void.ll         |  91 ++---
 3 files changed, 453 insertions(+), 45 deletions(-)
 create mode 100644 clang/test/CodeGenCUDA/simplify-cfg-unroll.cu

diff --git a/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
new file mode 100644
index 0000000000000..ecb421f9fc85c
--- /dev/null
+++ b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
@@ -0,0 +1,364 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -O2 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "amdgcn-amd-amdhsa" \
+// RUN:    -fcuda-is-device "-aux-target-cpu" "x86-64" -emit-llvm -o - %s | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-LABEL: define dso_local void @_Z4funciPPiiS_(
+// CHECK-SAME: i32 noundef [[IDX:%.*]], ptr nocapture noundef readonly [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr nocapture noundef [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIMS]], 0
+// CHECK-NEXT:    br i1 [[CMP1]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+// CHECK:       if.end:
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR]], align 8, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+// CHECK-NEXT:    store i32 [[ADD14]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// CHECK-NEXT:    store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
+// CHECK-NEXT:    store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
+// CHECK-NEXT:    store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_1:%.*]] = icmp eq i32 [[DIMS]], 1
+// CHECK-NEXT:    br i1 [[CMP1_1]], label [[CLEANUP]], label [[IF_END_1:%.*]]
+// CHECK:       if.end.1:
+// CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 1
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_129:%.*]] = add nsw i32 [[ADD14]], [[TMP10]]
+// CHECK-NEXT:    store i32 [[ADD14_129]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_1:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX11_1_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_1:%.*]] = add nsw i32 [[ADD14_1]], [[TMP11]]
+// CHECK-NEXT:    store i32 [[ADD14_1_1]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_1:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 2
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_2_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_1:%.*]] = add nsw i32 [[ADD14_2]], [[TMP12]]
+// CHECK-NEXT:    store i32 [[ADD14_2_1]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_1:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 3
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX11_3_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_1:%.*]] = add nsw i32 [[ADD14_3]], [[TMP13]]
+// CHECK-NEXT:    store i32 [[ADD14_3_1]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_2:%.*]] = icmp eq i32 [[DIMS]], 2
+// CHECK-NEXT:    br i1 [[CMP1_2]], label [[CLEANUP]], label [[IF_END_2:%.*]]
+// CHECK:       if.end.2:
+// CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 2
+// CHECK-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_230:%.*]] = add nsw i32 [[ADD14_129]], [[TMP15]]
+// CHECK-NEXT:    store i32 [[ADD14_230]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_2:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 1
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11_1_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_2:%.*]] = add nsw i32 [[ADD14_1_1]], [[TMP16]]
+// CHECK-NEXT:    store i32 [[ADD14_1_2]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_2:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 2
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX11_2_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_2:%.*]] = add nsw i32 [[ADD14_2_1]], [[TMP17]]
+// CHECK-NEXT:    store i32 [[ADD14_2_2]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_2:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 3
+// CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX11_3_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_2:%.*]] = add nsw i32 [[ADD14_3_1]], [[TMP18]]
+// CHECK-NEXT:    store i32 [[ADD14_3_2]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_3:%.*]] = icmp eq i32 [[DIMS]], 3
+// CHECK-NEXT:    br i1 [[CMP1_3]], label [[CLEANUP]], label [[IF_END_3:%.*]]
+// CHECK:       if.end.3:
+// CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 3
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_331:%.*]] = add nsw i32 [[ADD14_230]], [[TMP20]]
+// CHECK-NEXT:    store i32 [[ADD14_331]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_3:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 1
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX11_1_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_3:%.*]] = add nsw i32 [[ADD14_1_2]], [[TMP21]]
+// CHECK-NEXT:    store i32 [[ADD14_1_3]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_3:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 2
+// CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11_2_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_3:%.*]] = add nsw i32 [[ADD14_2_2]], [[TMP22]]
+// CHECK-NEXT:    store i32 [[ADD14_2_3]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_3:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 3
+// CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX11_3_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_3:%.*]] = add nsw i32 [[ADD14_3_2]], [[TMP23]]
+// CHECK-NEXT:    store i32 [[ADD14_3_3]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_4:%.*]] = icmp eq i32 [[DIMS]], 4
+// CHECK-NEXT:    br i1 [[CMP1_4]], label [[CLEANUP]], label [[IF_END_4:%.*]]
+// CHECK:       if.end.4:
+// CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 4
+// CHECK-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_4:%.*]] = add nsw i32 [[ADD14_331]], [[TMP25]]
+// CHECK-NEXT:    store i32 [[ADD14_4]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_4:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 1
+// CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX11_1_4]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_4:%.*]] = add nsw i32 [[ADD14_1_3]], [[TMP26]]
+// CHECK-NEXT:    store i32 [[ADD14_1_4]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_4:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 2
+// CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX11_2_4]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_4:%.*]] = add nsw i32 [[ADD14_2_3]], [[TMP27]]
+// CHECK-NEXT:    store i32 [[ADD14_2_4]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_4:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 3
+// CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX11_3_4]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_4:%.*]] = add nsw i32 [[ADD14_3_3]], [[TMP28]]
+// CHECK-NEXT:    store i32 [[ADD14_3_4]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_5:%.*]] = icmp eq i32 [[DIMS]], 5
+// CHECK-NEXT:    br i1 [[CMP1_5]], label [[CLEANUP]], label [[IF_END_5:%.*]]
+// CHECK:       if.end.5:
+// CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 5
+// CHECK-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_5:%.*]] = add nsw i32 [[ADD14_4]], [[TMP30]]
+// CHECK-NEXT:    store i32 [[ADD14_5]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_5:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 1
+// CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX11_1_5]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_5:%.*]] = add nsw i32 [[ADD14_1_4]], [[TMP31]]
+// CHECK-NEXT:    store i32 [[ADD14_1_5]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_5:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 2
+// CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX11_2_5]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_5:%.*]] = add nsw i32 [[ADD14_2_4]], [[TMP32]]
+// CHECK-NEXT:    store i32 [[ADD14_2_5]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_5:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 3
+// CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX11_3_5]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_5:%.*]] = add nsw i32 [[ADD14_3_4]], [[TMP33]]
+// CHECK-NEXT:    store i32 [[ADD14_3_5]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_6:%.*]] = icmp eq i32 [[DIMS]], 6
+// CHECK-NEXT:    br i1 [[CMP1_6]], label [[CLEANUP]], label [[IF_END_6:%.*]]
+// CHECK:       if.end.6:
+// CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 6
+// CHECK-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_6:%.*]] = add nsw i32 [[ADD14_5]], [[TMP35]]
+// CHECK-NEXT:    store i32 [[ADD14_6]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_6:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i64 1
+// CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX11_1_6]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_6:%.*]] = add nsw i32 [[ADD14_1_5]], [[TMP36]]
+// CHECK-NEXT:    store i32 [[ADD14_1_6]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_6:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i64 2
+// CHECK-NEXT:    [[TMP37:%.*]] = load i32, ptr [[ARRAYIDX11_2_6]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_6:%.*]] = add nsw i32 [[ADD14_2_5]], [[TMP37]]
+// CHECK-NEXT:    store i32 [[ADD14_2_6]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_6:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i64 3
+// CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX11_3_6]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_6:%.*]] = add nsw i32 [[ADD14_3_5]], [[TMP38]]
+// CHECK-NEXT:    store i32 [[ADD14_3_6]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_7:%.*]] = icmp eq i32 [[DIMS]], 7
+// CHECK-NEXT:    br i1 [[CMP1_7]], label [[CLEANUP]], label [[IF_END_7:%.*]]
+// CHECK:       if.end.7:
+// CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 7
+// CHECK-NEXT:    [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_7:%.*]] = add nsw i32 [[ADD14_6]], [[TMP40]]
+// CHECK-NEXT:    store i32 [[ADD14_7]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_7:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 1
+// CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr [[ARRAYIDX11_1_7]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_7:%.*]] = add nsw i32 [[ADD14_1_6]], [[TMP41]]
+// CHECK-NEXT:    store i32 [[ADD14_1_7]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_7:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 2
+// CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr [[ARRAYIDX11_2_7]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_7:%.*]] = add nsw i32 [[ADD14_2_6]], [[TMP42]]
+// CHECK-NEXT:    store i32 [[ADD14_2_7]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_7:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 3
+// CHECK-NEXT:    [[TMP43:%.*]] = load i32, ptr [[ARRAYIDX11_3_7]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_7:%.*]] = add nsw i32 [[ADD14_3_6]], [[TMP43]]
+// CHECK-NEXT:    store i32 [[ADD14_3_7]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_8:%.*]] = icmp eq i32 [[DIMS]], 8
+// CHECK-NEXT:    br i1 [[CMP1_8]], label [[CLEANUP]], label [[IF_END_8:%.*]]
+// CHECK:       if.end.8:
+// CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 8
+// CHECK-NEXT:    [[TMP44:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_8:%.*]] = add nsw i32 [[ADD14_7]], [[TMP45]]
+// CHECK-NEXT:    store i32 [[ADD14_8]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_8:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i64 1
+// CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX11_1_8]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_8:%.*]] = add nsw i32 [[ADD14_1_7]], [[TMP46]]
+// CHECK-NEXT:    store i32 [[ADD14_1_8]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_8:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i64 2
+// CHECK-NEXT:    [[TMP47:%.*]] = load i32, ptr [[ARRAYIDX11_2_8]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_8:%.*]] = add nsw i32 [[ADD14_2_7]], [[TMP47]]
+// CHECK-NEXT:    store i32 [[ADD14_2_8]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_8:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i64 3
+// CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr [[ARRAYIDX11_3_8]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_8:%.*]] = add nsw i32 [[ADD14_3_7]], [[TMP48]]
+// CHECK-NEXT:    store i32 [[ADD14_3_8]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_9:%.*]] = icmp eq i32 [[DIMS]], 9
+// CHECK-NEXT:    br i1 [[CMP1_9]], label [[CLEANUP]], label [[IF_END_9:%.*]]
+// CHECK:       if.end.9:
+// CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 9
+// CHECK-NEXT:    [[TMP49:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_9:%.*]] = add nsw i32 [[ADD14_8]], [[TMP50]]
+// CHECK-NEXT:    store i32 [[ADD14_9]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_9:%.*]] = getelementptr inbounds i32, ptr [[TMP49]], i64 1
+// CHECK-NEXT:    [[TMP51:%.*]] = load i32, ptr [[ARRAYIDX11_1_9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_9:%.*]] = add nsw i32 [[ADD14_1_8]], [[TMP51]]
+// CHECK-NEXT:    store i32 [[ADD14_1_9]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_9:%.*]] = getelementptr inbounds i32, ptr [[TMP49]], i64 2
+// CHECK-NEXT:    [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX11_2_9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_9:%.*]] = add nsw i32 [[ADD14_2_8]], [[TMP52]]
+// CHECK-NEXT:    store i32 [[ADD14_2_9]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_9:%.*]] = getelementptr inbounds i32, ptr [[TMP49]], i64 3
+// CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX11_3_9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_9:%.*]] = add nsw i32 [[ADD14_3_8]], [[TMP53]]
+// CHECK-NEXT:    store i32 [[ADD14_3_9]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_10:%.*]] = icmp eq i32 [[DIMS]], 10
+// CHECK-NEXT:    br i1 [[CMP1_10]], label [[CLEANUP]], label [[IF_END_10:%.*]]
+// CHECK:       if.end.10:
+// CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 10
+// CHECK-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_10:%.*]] = add nsw i32 [[ADD14_9]], [[TMP55]]
+// CHECK-NEXT:    store i32 [[ADD14_10]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_10:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i64 1
+// CHECK-NEXT:    [[TMP56:%.*]] = load i32, ptr [[ARRAYIDX11_1_10]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_10:%.*]] = add nsw i32 [[ADD14_1_9]], [[TMP56]]
+// CHECK-NEXT:    store i32 [[ADD14_1_10]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_10:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i64 2
+// CHECK-NEXT:    [[TMP57:%.*]] = load i32, ptr [[ARRAYIDX11_2_10]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_10:%.*]] = add nsw i32 [[ADD14_2_9]], [[TMP57]]
+// CHECK-NEXT:    store i32 [[ADD14_2_10]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_10:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i64 3
+// CHECK-NEXT:    [[TMP58:%.*]] = load i32, ptr [[ARRAYIDX11_3_10]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_10:%.*]] = add nsw i32 [[ADD14_3_9]], [[TMP58]]
+// CHECK-NEXT:    store i32 [[ADD14_3_10]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_11:%.*]] = icmp eq i32 [[DIMS]], 11
+// CHECK-NEXT:    br i1 [[CMP1_11]], label [[CLEANUP]], label [[IF_END_11:%.*]]
+// CHECK:       if.end.11:
+// CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 11
+// CHECK-NEXT:    [[TMP59:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_11:%.*]] = add nsw i32 [[ADD14_10]], [[TMP60]]
+// CHECK-NEXT:    store i32 [[ADD14_11]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_11:%.*]] = getelementptr inbounds i32, ptr [[TMP59]], i64 1
+// CHECK-NEXT:    [[TMP61:%.*]] = load i32, ptr [[ARRAYIDX11_1_11]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_11:%.*]] = add nsw i32 [[ADD14_1_10]], [[TMP61]]
+// CHECK-NEXT:    store i32 [[ADD14_1_11]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_11:%.*]] = getelementptr inbounds i32, ptr [[TMP59]], i64 2
+// CHECK-NEXT:    [[TMP62:%.*]] = load i32, ptr [[ARRAYIDX11_2_11]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_11:%.*]] = add nsw i32 [[ADD14_2_10]], [[TMP62]]
+// CHECK-NEXT:    store i32 [[ADD14_2_11]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_11:%.*]] = getelementptr inbounds i32, ptr [[TMP59]], i64 3
+// CHECK-NEXT:    [[TMP63:%.*]] = load i32, ptr [[ARRAYIDX11_3_11]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_11:%.*]] = add nsw i32 [[ADD14_3_10]], [[TMP63]]
+// CHECK-NEXT:    store i32 [[ADD14_3_11]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_12:%.*]] = icmp eq i32 [[DIMS]], 12
+// CHECK-NEXT:    br i1 [[CMP1_12]], label [[CLEANUP]], label [[IF_END_12:%.*]]
+// CHECK:       if.end.12:
+// CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 12
+// CHECK-NEXT:    [[TMP64:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_12:%.*]] = add nsw i32 [[ADD14_11]], [[TMP65]]
+// CHECK-NEXT:    store i32 [[ADD14_12]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_12:%.*]] = getelementptr inbounds i32, ptr [[TMP64]], i64 1
+// CHECK-NEXT:    [[TMP66:%.*]] = load i32, ptr [[ARRAYIDX11_1_12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_12:%.*]] = add nsw i32 [[ADD14_1_11]], [[TMP66]]
+// CHECK-NEXT:    store i32 [[ADD14_1_12]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_12:%.*]] = getelementptr inbounds i32, ptr [[TMP64]], i64 2
+// CHECK-NEXT:    [[TMP67:%.*]] = load i32, ptr [[ARRAYIDX11_2_12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_12:%.*]] = add nsw i32 [[ADD14_2_11]], [[TMP67]]
+// CHECK-NEXT:    store i32 [[ADD14_2_12]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_12:%.*]] = getelementptr inbounds i32, ptr [[TMP64]], i64 3
+// CHECK-NEXT:    [[TMP68:%.*]] = load i32, ptr [[ARRAYIDX11_3_12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_12:%.*]] = add nsw i32 [[ADD14_3_11]], [[TMP68]]
+// CHECK-NEXT:    store i32 [[ADD14_3_12]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_13:%.*]] = icmp eq i32 [[DIMS]], 13
+// CHECK-NEXT:    br i1 [[CMP1_13]], label [[CLEANUP]], label [[IF_END_13:%.*]]
+// CHECK:       if.end.13:
+// CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 13
+// CHECK-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_13:%.*]] = add nsw i32 [[ADD14_12]], [[TMP70]]
+// CHECK-NEXT:    store i32 [[ADD14_13]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_13:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 1
+// CHECK-NEXT:    [[TMP71:%.*]] = load i32, ptr [[ARRAYIDX11_1_13]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_13:%.*]] = add nsw i32 [[ADD14_1_12]], [[TMP71]]
+// CHECK-NEXT:    store i32 [[ADD14_1_13]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_13:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 2
+// CHECK-NEXT:    [[TMP72:%.*]] = load i32, ptr [[ARRAYIDX11_2_13]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_13:%.*]] = add nsw i32 [[ADD14_2_12]], [[TMP72]]
+// CHECK-NEXT:    store i32 [[ADD14_2_13]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_13:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 3
+// CHECK-NEXT:    [[TMP73:%.*]] = load i32, ptr [[ARRAYIDX11_3_13]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_13:%.*]] = add nsw i32 [[ADD14_3_12]], [[TMP73]]
+// CHECK-NEXT:    store i32 [[ADD14_3_13]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_14:%.*]] = icmp eq i32 [[DIMS]], 14
+// CHECK-NEXT:    br i1 [[CMP1_14]], label [[CLEANUP]], label [[IF_END_14:%.*]]
+// CHECK:       if.end.14:
+// CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 14
+// CHECK-NEXT:    [[TMP74:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_14:%.*]] = add nsw i32 [[ADD14_13]], [[TMP75]]
+// CHECK-NEXT:    store i32 [[ADD14_14]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_14:%.*]] = getelementptr inbounds i32, ptr [[TMP74]], i64 1
+// CHECK-NEXT:    [[TMP76:%.*]] = load i32, ptr [[ARRAYIDX11_1_14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_14:%.*]] = add nsw i32 [[ADD14_1_13]], [[TMP76]]
+// CHECK-NEXT:    store i32 [[ADD14_1_14]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_14:%.*]] = getelementptr inbounds i32, ptr [[TMP74]], i64 2
+// CHECK-NEXT:    [[TMP77:%.*]] = load i32, ptr [[ARRAYIDX11_2_14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_14:%.*]] = add nsw i32 [[ADD14_2_13]], [[TMP77]]
+// CHECK-NEXT:    store i32 [[ADD14_2_14]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_14:%.*]] = getelementptr inbounds i32, ptr [[TMP74]], i64 3
+// CHECK-NEXT:    [[TMP78:%.*]] = load i32, ptr [[ARRAYIDX11_3_14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_14:%.*]] = add nsw i32 [[ADD14_3_13]], [[TMP78]]
+// CHECK-NEXT:    store i32 [[ADD14_3_14]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[CMP1_15:%.*]] = icmp eq i32 [[DIMS]], 15
+// CHECK-NEXT:    br i1 [[CMP1_15]], label [[CLEANUP]], label [[IF_END_15:%.*]]
+// CHECK:       if.end.15:
+// CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 15
+// CHECK-NEXT:    [[TMP79:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_15:%.*]] = add nsw i32 [[ADD14_14]], [[TMP80]]
+// CHECK-NEXT:    store i32 [[ADD14_15]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_15:%.*]] = getelementptr inbounds i32, ptr [[TMP79]], i64 1
+// CHECK-NEXT:    [[TMP81:%.*]] = load i32, ptr [[ARRAYIDX11_1_15]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_15:%.*]] = add nsw i32 [[ADD14_1_14]], [[TMP81]]
+// CHECK-NEXT:    store i32 [[ADD14_1_15]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_15:%.*]] = getelementptr inbounds i32, ptr [[TMP79]], i64 2
+// CHECK-NEXT:    [[TMP82:%.*]] = load i32, ptr [[ARRAYIDX11_2_15]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_15:%.*]] = add nsw i32 [[ADD14_2_14]], [[TMP82]]
+// CHECK-NEXT:    store i32 [[ADD14_2_15]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_15:%.*]] = getelementptr inbounds i32, ptr [[TMP79]], i64 3
+// CHECK-NEXT:    [[TMP83:%.*]] = load i32, ptr [[ARRAYIDX11_3_15]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_15:%.*]] = add nsw i32 [[ADD14_3_14]], [[TMP83]]
+// CHECK-NEXT:    store i32 [[ADD14_3_15]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    br label [[CLEANUP]]
+// CHECK:       cleanup:
+// CHECK-NEXT:    ret void
+//
+__device__ void func(int Idx, int *Arr[], int Dims, int *Out) {
+  #pragma unroll
+  for (int Dim = 0; Dim < 16; ++Dim) {
+    if (Dim == Dims) {
+      break;
+    }
+    int divmod = Arr[Dim][Idx];
+    Idx = divmod + 1;
+
+    #pragma unroll
+    for (int arg = 0; arg < 4; arg++) {
+      Out[arg] += Arr[Dim][arg];
+    }
+  }
+}
+//.
+// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"Simple C++ TBAA"}
+// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK: [[META8]] = !{!"int", [[META5]], i64 0}
+//.
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c09cf9c2325c4..f3ca068724b78 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3634,6 +3634,46 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
   }
 }
 
+// Escape folding "I < ConstNum" with "Cond2" due to loops of constant
+// iterations can be easily optimized (e.g unroll).
+// for (int I = 0; I < ConstNum; ++I) { // ConstNum > 1
+//   if (Cond2) {
+//     break;
+//   }
+//    xxx loop body;
+//  }
+// Folding these conditional branches may break loop optimizations.
+static bool isLoopHeaderWithConstantCond(BranchInst *PBI) {
+  ICmpInst *ICmp = dyn_cast<ICmpInst>(PBI->getCondition());
+  if (!ICmp)
+    return false;
+
+  // Make sure ConstNum > 1
+  bool DoFold = true;
+  for (unsigned I = 0; I < ICmp->getNumOperands(); ++I) {
+    ConstantInt *Op = dyn_cast<ConstantInt>(ICmp->getOperand(I));
+    if (!Op)
+      continue;
+    if (Op->getSExtValue() > 1) {
+      DoFold = false;
+      break;
+    }
+  }
+  if (DoFold)
+    return false;
+
+  // Loop information has not been established yet, so here we easily judge
+  // whether it is a loop by backedge.
+  BasicBlock *PBB = PBI->getParent();
+  for (Function::iterator I = PBB->getIterator(), E = PBB->getParent()->end();
+       I != E; ++I) {
+    BasicBlock *BB = &*I;
+    if (is_contained(predecessors(PBB), BB))
+      return true;
+  }
+  return false;
+}
+
 /// Determine if the two branches share a common destination and deduce a glue
 /// that joins the branches' conditions to arrive at the common destination if
 /// that would be profitable.
@@ -3645,6 +3685,9 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
   assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
          "PredBB must be a predecessor of BB.");
 
+  if (isLoopHeaderWithConstantCond(PBI))
+    return std::nullopt;
+
   // We have the potential to fold the conditions together, but if the
   // predecessor branch is predictable, we may not want to merge them.
   uint64_t PTWeight, PFWeight;
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
index e7e7c58b5d29d..1d85db43a5ccb 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
@@ -46,13 +46,13 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud,
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASR]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4, !alias.scope !5, !noalias !8
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD23:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope !12, !noalias !13
+; CHECK-NEXT:    [[WIDE_LOAD23:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD24:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15
+; CHECK-NEXT:    [[WIDE_LOAD24:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD25:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope !15
+; CHECK-NEXT:    [[WIDE_LOAD25:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META15]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], <i32 23, i32 23>
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i32> [[WIDE_LOAD23]], <i32 24, i32 24>
 ; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD24]], <i32 25, i32 25>
@@ -114,13 +114,13 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud,
 ; CHECK-NEXT:    [[PREDPHI29:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP11]], <2 x i32> [[TMP54]]
 ; CHECK-NEXT:    [[PREDPHI30:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP12]], <2 x i32> [[TMP55]]
 ; CHECK-NEXT:    [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP57]], align 4, !alias.scope !5, !noalias !8
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP57]], align 4, !alias.scope [[META5]], !noalias [[META8]]
 ; CHECK-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI28]], ptr [[TMP58]], align 4, !alias.scope !12, !noalias !13
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI28]], ptr [[TMP58]], align 4, !alias.scope [[META12]], !noalias [[META13]]
 ; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI29]], ptr [[TMP59]], align 4, !alias.scope !14, !noalias !15
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI29]], ptr [[TMP59]], align 4, !alias.scope [[META14]], !noalias [[META15]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI30]], ptr [[TMP60]], align 4, !alias.scope !15
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI30]], ptr [[TMP60]], align 4, !alias.scope [[META15]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP61:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP61]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -207,14 +207,14 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud,
 ; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ASR]], i64 [[TMP1]]
 ; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !5, !noalias !8
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !5, !noalias !8
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !12, !noalias !13
-; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !12, !noalias !13
-; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !14, !noalias !15
-; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15
-; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope !15
-; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope !15
+; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META5]], !noalias [[META8]]
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope [[META12]], !noalias [[META13]]
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META14]], !noalias [[META15]]
+; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META15]]
+; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META15]]
 ; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = add nsw i32 [[TMP10]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP19:%.*]] = add nsw i32 [[TMP11]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP20:%.*]] = add nsw i32 [[TMP12]], 24
@@ -259,14 +259,14 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud,
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI29:%.*]] = select i1 [[TMP45]], i32 [[TMP23]], i32 [[TMP42]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI30:%.*]] = select i1 [[TMP44]], i32 [[TMP24]], i32 [[TMP35]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI31:%.*]] = select i1 [[TMP45]], i32 [[TMP25]], i32 [[TMP43]]
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !5, !noalias !8
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI25]], ptr [[TMP3]], align 4, !alias.scope !5, !noalias !8
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI26]], ptr [[TMP4]], align 4, !alias.scope !12, !noalias !13
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI27]], ptr [[TMP5]], align 4, !alias.scope !12, !noalias !13
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI28]], ptr [[TMP6]], align 4, !alias.scope !14, !noalias !15
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI29]], ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI30]], ptr [[TMP8]], align 4, !alias.scope !15
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI31]], ptr [[TMP9]], align 4, !alias.scope !15
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META5]], !noalias [[META8]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI25]], ptr [[TMP3]], align 4, !alias.scope [[META5]], !noalias [[META8]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI26]], ptr [[TMP4]], align 4, !alias.scope [[META12]], !noalias [[META13]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI27]], ptr [[TMP5]], align 4, !alias.scope [[META12]], !noalias [[META13]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI28]], ptr [[TMP6]], align 4, !alias.scope [[META14]], !noalias [[META15]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI29]], ptr [[TMP7]], align 4, !alias.scope [[META14]], !noalias [[META15]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI30]], ptr [[TMP8]], align 4, !alias.scope [[META15]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI31]], ptr [[TMP9]], align 4, !alias.scope [[META15]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; UNROLL-NO-VF-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -371,10 +371,10 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META23:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope !23
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META23]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], <i32 23, i32 23>
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], <i32 100, i32 100>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
@@ -406,7 +406,7 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP5]], <2 x i32> [[TMP24]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4, !alias.scope !20, !noalias !23
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4, !alias.scope [[META20]], !noalias [[META23]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
@@ -450,12 +450,12 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META23:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META20]], !noalias [[META23]]
 ; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !23
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !23
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META23]]
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META23]]
 ; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100
@@ -480,8 +480,8 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; UNROLL-NO-VF-NEXT:    [[TMP23:%.*]] = xor i1 [[TMP13]], true
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI:%.*]] = select i1 [[TMP22]], i32 [[TMP10]], i32 [[TMP17]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI4:%.*]] = select i1 [[TMP23]], i32 [[TMP11]], i32 [[TMP21]]
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META20]], !noalias [[META23]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope [[META20]], !noalias [[META23]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; UNROLL-NO-VF-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
@@ -556,10 +556,10 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META29:![0-9]+]], !noalias [[META32:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope !32
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META32]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], <i32 23, i32 23>
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], <i32 100, i32 100>
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], <i32 200, i32 200>
@@ -596,7 +596,7 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; CHECK-NEXT:    [[TMP30:%.*]] = select <2 x i1> [[TMP8]], <2 x i1> [[TMP29]], <2 x i1> zeroinitializer, !dbg [[DBG35]]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[TMP5]], <2 x i32> [[TMP28]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope !29, !noalias !32
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope [[META29]], !noalias [[META32]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP32]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
@@ -610,15 +610,16 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; CHECK-NEXT:    [[LSD_B:%.*]] = load i32, ptr [[ISD_B]], align 4
 ; CHECK-NEXT:    [[PSD:%.*]] = add nsw i32 [[LSD]], 23
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LSD]], 100
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[CHECKBB:%.*]], !dbg [[DBG34]]
+; CHECK:       checkbb:
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[LSD]], 200
-; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP1]], [[CMP2]], !dbg [[DBG34]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[IF_THEN:%.*]], label [[IF_END]], !dbg [[DBG34]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[IF_THEN]], label [[IF_END]], !dbg [[DBG35]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[SD1:%.*]] = sdiv i32 [[PSD]], [[LSD]]
 ; CHECK-NEXT:    [[RSD:%.*]] = sdiv i32 [[LSD_B]], [[SD1]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[YSD_0:%.*]] = phi i32 [ [[RSD]], [[IF_THEN]] ], [ [[PSD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[YSD_0:%.*]] = phi i32 [ [[RSD]], [[IF_THEN]] ], [ [[PSD]], [[CHECKBB]] ]
 ; CHECK-NEXT:    store i32 [[YSD_0]], ptr [[ISD]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128
@@ -642,12 +643,12 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META29:![0-9]+]], !noalias [[META32:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META29]], !noalias [[META32]]
 ; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !32
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !32
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META32]]
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META32]]
 ; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100
@@ -682,8 +683,8 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; UNROLL-NO-VF-NEXT:    [[TMP33:%.*]] = select i1 [[TMP17]], i1 [[TMP31]], i1 false, !dbg [[DBG35]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI:%.*]] = select i1 [[TMP32]], i32 [[TMP10]], i32 [[TMP25]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI4:%.*]] = select i1 [[TMP33]], i32 [[TMP11]], i32 [[TMP29]]
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META29]], !noalias [[META32]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope [[META29]], !noalias [[META32]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; UNROLL-NO-VF-NEXT:    [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]



More information about the cfe-commits mailing list