[clang] [llvm] [LoopUnroll] Add flag to enforce loop unroll pragma regardless of expensive trip count (PR #180961)
Adel Ejjeh via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 11 08:20:50 PST 2026
https://github.com/adelejjeh updated https://github.com/llvm/llvm-project/pull/180961
>From 72e234d5047d9a7f62c606d7e30f234354022bf5 Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh at amd.com>
Date: Wed, 11 Feb 2026 09:42:23 -0600
Subject: [PATCH] Add flag to enforce loop unroll pragma regardless of
expensive trip count
Co-authored-by: Carlo Bertolli <carlo.bertolli at amd.com>
---
clang/include/clang/Basic/CodeGenOptions.def | 1 +
clang/include/clang/Options/Options.td | 2 +
clang/lib/CodeGen/CGLoopInfo.cpp | 14 +
clang/lib/CodeGen/CGLoopInfo.h | 3 +
clang/lib/Frontend/CompilerInvocation.cpp | 5 +-
clang/test/CodeGen/force-unroll-pragma.c | 458 +++++++++++++++++
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 17 +-
.../LoopUnroll/expensive-tripcount.ll | 474 ++++++++++++++++++
8 files changed, 969 insertions(+), 5 deletions(-)
create mode 100644 clang/test/CodeGen/force-unroll-pragma.c
create mode 100644 llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 8c056bb690690..1ff70ca69da23 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -339,6 +339,7 @@ VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500, Benign) ///< Minimum time granul
CODEGENOPT(InterchangeLoops , 1, 0, Benign) ///< Run loop-interchange.
CODEGENOPT(FuseLoops , 1, 0, Benign) ///< Run loop-fusion.
CODEGENOPT(UnrollLoops , 1, 0, Benign) ///< Control whether loops are unrolled.
+CODEGENOPT(ForceUnrollPragma , 1, 0, Benign) ///< Force unroll runtime loops when pragma provided.
CODEGENOPT(RerollLoops , 1, 0, Benign) ///< Control whether loops are rerolled.
CODEGENOPT(NoUseJumpTables , 1, 0, Benign) ///< Set when -fno-jump-tables is enabled.
VALUE_CODEGENOPT(UnwindTables, 2, 0, Benign) ///< Unwind tables (1, Benign) or asynchronous unwind tables (2, Benign)
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 155f19fb00bd8..09a4219d0f378 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -4492,6 +4492,8 @@ def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
HelpText<"Turn off loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
+def force_unroll_pragma : Flag<["-"], "force-unroll-pragma">, Group<f_Group>,
+ HelpText<"Force unroll runtime loops when an unroll pragma is provided">, Visibility<[ClangOption, CC1Option]>;
def ffinite_loops: Flag<["-"], "ffinite-loops">, Group<f_Group>,
HelpText<"Assume all non-trivial loops are finite.">, Visibility<[ClangOption, CC1Option]>;
def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>,
diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp
index b2b569a43038c..93486a65de22d 100644
--- a/clang/lib/CodeGen/CGLoopInfo.cpp
+++ b/clang/lib/CodeGen/CGLoopInfo.cpp
@@ -122,6 +122,13 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}
+ // Emit metadata to allow expensive trip count if ForceUnrollPragma is set
+ // This applies when unroll pragma is specified without an explicit count
+ if (Attrs.ForceUnrollPragma) {
+ Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.runtime.force")};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
if (FollowupHasTransforms)
Args.push_back(
createFollowupMetadata("llvm.loop.unroll.followup_all", Followup));
@@ -821,6 +828,13 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
StagedAttrs.UnrollCount == 0))
setUnrollState(LoopAttributes::Disable);
+ // Set ForceUnrollPragma flag if the flag is enabled and there's an unroll
+ // pragma without an explicit count (pragmas with explicit counts already
+ // enable expensive trip count)
+ if (CGOpts.ForceUnrollPragma) {
+ StagedAttrs.ForceUnrollPragma = true;
+ }
+
/// Stage the attributes.
push(Header, StartLoc, EndLoc);
}
diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h
index 3c57124f4137c..e8ec8af55a616 100644
--- a/clang/lib/CodeGen/CGLoopInfo.h
+++ b/clang/lib/CodeGen/CGLoopInfo.h
@@ -84,6 +84,9 @@ struct LoopAttributes {
/// Value for whether the loop is required to make progress.
bool MustProgress;
+
+ /// Value for whether to force unroll pragma even with expensive trip count.
+ bool ForceUnrollPragma = false;
};
/// Information used when generating a structured loop.
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 6aa2afb6f5918..005d1ae47b1a5 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1603,7 +1603,8 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
GenerateArg(Consumer, OPT_funroll_loops);
else if (!Opts.UnrollLoops && Opts.OptimizationLevel > 1)
GenerateArg(Consumer, OPT_fno_unroll_loops);
-
+ if (Opts.ForceUnrollPragma)
+ GenerateArg(Consumer, OPT_force_unroll_pragma);
if (Opts.InterchangeLoops)
GenerateArg(Consumer, OPT_floop_interchange);
else
@@ -1921,6 +1922,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
Opts.UnrollLoops =
Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops,
(Opts.OptimizationLevel > 1));
+ Opts.ForceUnrollPragma = Args.hasFlag(
+ OPT_force_unroll_pragma, /*OPT_fno_force_unroll_pragma*/ {}, false);
Opts.InterchangeLoops =
Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false);
Opts.FuseLoops = Args.hasFlag(OPT_fexperimental_loop_fusion,
diff --git a/clang/test/CodeGen/force-unroll-pragma.c b/clang/test/CodeGen/force-unroll-pragma.c
new file mode 100644
index 0000000000000..c129a51c97341
--- /dev/null
+++ b/clang/test/CodeGen/force-unroll-pragma.c
@@ -0,0 +1,458 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOPRAGMA-NOFLAG
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -force-unroll-pragma %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOPRAGMA-FLAG
+// RUN: %clang_cc1 -DPRAGMA_UNROLL -triple x86_64-unknown-linux-gnu -O2 %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-PRAGMA-NOFLAG
+// RUN: %clang_cc1 -DPRAGMA_UNROLL -triple x86_64-unknown-linux-gnu -O2 -force-unroll-pragma %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-PRAGMA-FLAG
+
+const int output_vec_size = 4;
+struct ArgVec {
+ float v[output_vec_size];
+};
+
+// CHECK-LABEL: define dso_local i32 @calc_offset(
+// CHECK-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[OFF1]], [[INPUT_OFFSET]]
+// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[OFF2]]
+// CHECK-NEXT: ret i32 [[ADD1]]
+//
+int calc_offset(int input_offset, int off1, int off2) {
+ return input_offset + off1 + off2;
+}
+
+
+// CHECK-NOPRAGMA-NOFLAG-LABEL: define dso_local void @complex_loop(
+// CHECK-NOPRAGMA-NOFLAG-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ENTRY:.*:]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]]
+// CHECK-NOPRAGMA-NOFLAG: [[FOR_BODY_LR_PH]]:
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6:![0-9]+]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: br label %[[FOR_BODY:.*]]
+// CHECK-NOPRAGMA-NOFLAG: [[FOR_BODY]]:
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[TMP3:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[TMP4:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_2:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_1:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[TMP6:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[TMP7:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP7]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8:![0-9]+]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ADD]] = fadd float [[TMP6]], [[NEXT_SROA_0_0_COPYLOAD]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ADD_1]] = fadd float [[TMP5]], [[NEXT_SROA_4_0_COPYLOAD]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ADD_2]] = fadd float [[TMP4]], [[NEXT_SROA_5_0_COPYLOAD]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[ADD_3]] = fadd float [[TMP3]], [[NEXT_SROA_6_0_COPYLOAD]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP1]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]]
+// CHECK-NOPRAGMA-NOFLAG-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END14]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK-NOPRAGMA-NOFLAG: [[FOR_END14]]:
+// CHECK-NOPRAGMA-NOFLAG-NEXT: ret void
+//
+// CHECK-NOPRAGMA-FLAG-LABEL: define dso_local void @complex_loop(
+// CHECK-NOPRAGMA-FLAG-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ENTRY:.*:]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]]
+// CHECK-NOPRAGMA-FLAG-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]]
+// CHECK-NOPRAGMA-FLAG: [[FOR_BODY_LR_PH]]:
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64
+// CHECK-NOPRAGMA-FLAG-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64
+// CHECK-NOPRAGMA-FLAG-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
+// CHECK-NOPRAGMA-FLAG-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6:![0-9]+]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4
+// CHECK-NOPRAGMA-FLAG-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8
+// CHECK-NOPRAGMA-FLAG-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12
+// CHECK-NOPRAGMA-FLAG-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-FLAG-NEXT: br label %[[FOR_BODY:.*]]
+// CHECK-NOPRAGMA-FLAG: [[FOR_BODY]]:
+// CHECK-NOPRAGMA-FLAG-NEXT: [[TMP3:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[TMP4:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_2:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_1:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[TMP6:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[TMP7:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP7]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+// CHECK-NOPRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
+// CHECK-NOPRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4
+// CHECK-NOPRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8
+// CHECK-NOPRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4
+// CHECK-NOPRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12
+// CHECK-NOPRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8:![0-9]+]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ADD]] = fadd float [[TMP6]], [[NEXT_SROA_0_0_COPYLOAD]]
+// CHECK-NOPRAGMA-FLAG-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ADD_1]] = fadd float [[TMP5]], [[NEXT_SROA_4_0_COPYLOAD]]
+// CHECK-NOPRAGMA-FLAG-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ADD_2]] = fadd float [[TMP4]], [[NEXT_SROA_5_0_COPYLOAD]]
+// CHECK-NOPRAGMA-FLAG-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[ADD_3]] = fadd float [[TMP3]], [[NEXT_SROA_6_0_COPYLOAD]]
+// CHECK-NOPRAGMA-FLAG-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP1]]
+// CHECK-NOPRAGMA-FLAG-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]]
+// CHECK-NOPRAGMA-FLAG-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END14]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK-NOPRAGMA-FLAG: [[FOR_END14]]:
+// CHECK-NOPRAGMA-FLAG-NEXT: ret void
+//
+// CHECK-PRAGMA-NOFLAG-LABEL: define dso_local void @complex_loop(
+// CHECK-PRAGMA-NOFLAG-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ENTRY:.*:]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]]
+// CHECK-PRAGMA-NOFLAG-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]]
+// CHECK-PRAGMA-NOFLAG: [[FOR_BODY_LR_PH]]:
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64
+// CHECK-PRAGMA-NOFLAG-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64
+// CHECK-PRAGMA-NOFLAG-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
+// CHECK-PRAGMA-NOFLAG-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6:![0-9]+]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4
+// CHECK-PRAGMA-NOFLAG-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8
+// CHECK-PRAGMA-NOFLAG-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12
+// CHECK-PRAGMA-NOFLAG-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-NOFLAG-NEXT: br label %[[FOR_BODY:.*]]
+// CHECK-PRAGMA-NOFLAG: [[FOR_BODY]]:
+// CHECK-PRAGMA-NOFLAG-NEXT: [[TMP3:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], %[[FOR_BODY]] ]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[TMP4:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_2:%.*]], %[[FOR_BODY]] ]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_1:%.*]], %[[FOR_BODY]] ]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[TMP6:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[TMP7:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP7]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+// CHECK-PRAGMA-NOFLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
+// CHECK-PRAGMA-NOFLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4
+// CHECK-PRAGMA-NOFLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8
+// CHECK-PRAGMA-NOFLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4
+// CHECK-PRAGMA-NOFLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12
+// CHECK-PRAGMA-NOFLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8:![0-9]+]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ADD]] = fadd float [[TMP6]], [[NEXT_SROA_0_0_COPYLOAD]]
+// CHECK-PRAGMA-NOFLAG-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ADD_1]] = fadd float [[TMP5]], [[NEXT_SROA_4_0_COPYLOAD]]
+// CHECK-PRAGMA-NOFLAG-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ADD_2]] = fadd float [[TMP4]], [[NEXT_SROA_5_0_COPYLOAD]]
+// CHECK-PRAGMA-NOFLAG-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[ADD_3]] = fadd float [[TMP3]], [[NEXT_SROA_6_0_COPYLOAD]]
+// CHECK-PRAGMA-NOFLAG-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP1]]
+// CHECK-PRAGMA-NOFLAG-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]]
+// CHECK-PRAGMA-NOFLAG-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END14]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK-PRAGMA-NOFLAG: [[FOR_END14]]:
+// CHECK-PRAGMA-NOFLAG-NEXT: ret void
+//
+// CHECK-PRAGMA-FLAG-LABEL: define dso_local void @complex_loop(
+// CHECK-PRAGMA-FLAG-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-PRAGMA-FLAG-NEXT: [[ENTRY:.*:]]
+// CHECK-PRAGMA-FLAG-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]]
+// CHECK-PRAGMA-FLAG-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]]
+// CHECK-PRAGMA-FLAG: [[FOR_BODY_LR_PH]]:
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6:![0-9]+]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP1]], [[TMP0]]
+// CHECK-PRAGMA-FLAG-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP3]], i64 [[TMP2]])
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP4:%.*]] = icmp slt i64 [[TMP3]], [[TMP2]]
+// CHECK-PRAGMA-FLAG-NEXT: [[UMIN:%.*]] = zext i1 [[TMP4]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP3]], [[UMIN]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP6:%.*]] = sub i64 [[SMAX]], [[TMP5]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP6]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[UMIN]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1
+// CHECK-PRAGMA-FLAG-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP9]], 7
+// CHECK-PRAGMA-FLAG-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
+// CHECK-PRAGMA-FLAG-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]], label %[[FOR_BODY_PROL:.*]]
+// CHECK-PRAGMA-FLAG: [[FOR_BODY_PROL]]:
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP10:%.*]] = phi float [ [[ADD_3_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP11:%.*]] = phi float [ [[ADD_2_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP12:%.*]] = phi float [ [[ADD_1_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP13:%.*]] = phi float [ [[ADD_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[TMP0]], %[[FOR_BODY_LR_PH]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], %[[FOR_BODY_PROL]] ], [ 0, %[[FOR_BODY_LR_PH]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP14:%.*]] = trunc nsw i64 [[INDVARS_IV_PROL]] to i32
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD1_I_PROL:%.*]] = add i32 [[ADD_I]], [[TMP14]]
+// CHECK-PRAGMA-FLAG-NEXT: [[IDXPROM_PROL:%.*]] = sext i32 [[ADD1_I_PROL]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_PROL]]
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD_PROL:%.*]] = load float, ptr [[ARRAYIDX_PROL]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_PROL]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_PROL]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_PROL]], align 4, !tbaa [[CHAR_TBAA8:![0-9]+]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_PROL]] = fadd float [[TMP13]], [[NEXT_SROA_0_0_COPYLOAD_PROL]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_PROL]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_1_PROL]] = fadd float [[TMP12]], [[NEXT_SROA_4_0_COPYLOAD_PROL]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_1_PROL]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_2_PROL]] = fadd float [[TMP11]], [[NEXT_SROA_5_0_COPYLOAD_PROL]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_2_PROL]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_3_PROL]] = fadd float [[TMP10]], [[NEXT_SROA_6_0_COPYLOAD_PROL]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_3_PROL]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nsw i64 [[INDVARS_IV_PROL]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+// CHECK-PRAGMA-FLAG-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+// CHECK-PRAGMA-FLAG-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT]], label %[[FOR_BODY_PROL]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK-PRAGMA-FLAG: [[FOR_BODY_PROL_LOOPEXIT]]:
+// CHECK-PRAGMA-FLAG-NEXT: [[DOTUNR:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_3_PROL]], %[[FOR_BODY_PROL]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[DOTUNR30:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_2_PROL]], %[[FOR_BODY_PROL]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[DOTUNR31:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_1_PROL]], %[[FOR_BODY_PROL]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[DOTUNR32:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_PROL]], %[[FOR_BODY_PROL]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP8]], 7
+// CHECK-PRAGMA-FLAG-NEXT: br i1 [[TMP15]], label %[[FOR_END14]], label %[[FOR_BODY:.*]]
+// CHECK-PRAGMA-FLAG: [[FOR_BODY]]:
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP16:%.*]] = phi float [ [[ADD_3_7:%.*]], %[[FOR_BODY]] ], [ [[DOTUNR]], %[[FOR_BODY_PROL_LOOPEXIT]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP17:%.*]] = phi float [ [[ADD_2_7:%.*]], %[[FOR_BODY]] ], [ [[DOTUNR30]], %[[FOR_BODY_PROL_LOOPEXIT]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP18:%.*]] = phi float [ [[ADD_1_7:%.*]], %[[FOR_BODY]] ], [ [[DOTUNR31]], %[[FOR_BODY_PROL_LOOPEXIT]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP19:%.*]] = phi float [ [[ADD_7:%.*]], %[[FOR_BODY]] ], [ [[DOTUNR32]], %[[FOR_BODY_PROL_LOOPEXIT]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_UNR]], %[[FOR_BODY_PROL_LOOPEXIT]] ]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP20:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP20]]
+// CHECK-PRAGMA-FLAG-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]]
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD:%.*]] = fadd float [[TMP19]], [[NEXT_SROA_0_0_COPYLOAD]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_1:%.*]] = fadd float [[TMP18]], [[NEXT_SROA_4_0_COPYLOAD]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_2:%.*]] = fadd float [[TMP17]], [[NEXT_SROA_5_0_COPYLOAD]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_3:%.*]] = fadd float [[TMP16]], [[NEXT_SROA_6_0_COPYLOAD]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP21:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT]] to i32
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD1_I_1:%.*]] = add i32 [[ADD_I]], [[TMP21]]
+// CHECK-PRAGMA-FLAG-NEXT: [[IDXPROM_1:%.*]] = sext i32 [[ADD1_I_1]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_1]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_1]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_1]], align 4, !tbaa [[CHAR_TBAA8]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_133:%.*]] = fadd float [[ADD]], [[NEXT_SROA_0_0_COPYLOAD_1]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_133]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_1_1:%.*]] = fadd float [[ADD_1]], [[NEXT_SROA_4_0_COPYLOAD_1]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_1_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_2_1:%.*]] = fadd float [[ADD_2]], [[NEXT_SROA_5_0_COPYLOAD_1]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_2_1]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_3_1:%.*]] = fadd float [[ADD_3]], [[NEXT_SROA_6_0_COPYLOAD_1]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_3_1]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP22:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_1]] to i32
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD1_I_2:%.*]] = add i32 [[ADD_I]], [[TMP22]]
+// CHECK-PRAGMA-FLAG-NEXT: [[IDXPROM_2:%.*]] = sext i32 [[ADD1_I_2]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_2]]
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_2]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_2]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_2]], align 4, !tbaa [[CHAR_TBAA8]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_234:%.*]] = fadd float [[ADD_133]], [[NEXT_SROA_0_0_COPYLOAD_2]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_234]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_1_2:%.*]] = fadd float [[ADD_1_1]], [[NEXT_SROA_4_0_COPYLOAD_2]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_1_2]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_2_2:%.*]] = fadd float [[ADD_2_1]], [[NEXT_SROA_5_0_COPYLOAD_2]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_2_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_3_2:%.*]] = fadd float [[ADD_3_1]], [[NEXT_SROA_6_0_COPYLOAD_2]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_3_2]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP23:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_2]] to i32
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD1_I_3:%.*]] = add i32 [[ADD_I]], [[TMP23]]
+// CHECK-PRAGMA-FLAG-NEXT: [[IDXPROM_3:%.*]] = sext i32 [[ADD1_I_3]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_3]]
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_3]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_3]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_3]], align 4, !tbaa [[CHAR_TBAA8]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_335:%.*]] = fadd float [[ADD_234]], [[NEXT_SROA_0_0_COPYLOAD_3]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_335]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_1_3:%.*]] = fadd float [[ADD_1_2]], [[NEXT_SROA_4_0_COPYLOAD_3]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_1_3]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_2_3:%.*]] = fadd float [[ADD_2_2]], [[NEXT_SROA_5_0_COPYLOAD_3]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_2_3]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_3_3:%.*]] = fadd float [[ADD_3_2]], [[NEXT_SROA_6_0_COPYLOAD_3]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_3_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_2]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP24:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_3]] to i32
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD1_I_4:%.*]] = add i32 [[ADD_I]], [[TMP24]]
+// CHECK-PRAGMA-FLAG-NEXT: [[IDXPROM_4:%.*]] = sext i32 [[ADD1_I_4]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_4]]
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_4]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_4]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_4]], align 4, !tbaa [[CHAR_TBAA8]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_4:%.*]] = fadd float [[ADD_335]], [[NEXT_SROA_0_0_COPYLOAD_4]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_4]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_1_4:%.*]] = fadd float [[ADD_1_3]], [[NEXT_SROA_4_0_COPYLOAD_4]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_1_4]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_2_4:%.*]] = fadd float [[ADD_2_3]], [[NEXT_SROA_5_0_COPYLOAD_4]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_2_4]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_3_4:%.*]] = fadd float [[ADD_3_3]], [[NEXT_SROA_6_0_COPYLOAD_4]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_3_4]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_3]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP25:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_4]] to i32
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD1_I_5:%.*]] = add i32 [[ADD_I]], [[TMP25]]
+// CHECK-PRAGMA-FLAG-NEXT: [[IDXPROM_5:%.*]] = sext i32 [[ADD1_I_5]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_5]]
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_5]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_5]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_5]], align 4, !tbaa [[CHAR_TBAA8]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_5:%.*]] = fadd float [[ADD_4]], [[NEXT_SROA_0_0_COPYLOAD_5]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_5]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_1_5:%.*]] = fadd float [[ADD_1_4]], [[NEXT_SROA_4_0_COPYLOAD_5]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_1_5]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_2_5:%.*]] = fadd float [[ADD_2_4]], [[NEXT_SROA_5_0_COPYLOAD_5]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_2_5]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_3_5:%.*]] = fadd float [[ADD_3_4]], [[NEXT_SROA_6_0_COPYLOAD_5]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_3_5]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_4]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP26:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_5]] to i32
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD1_I_6:%.*]] = add i32 [[ADD_I]], [[TMP26]]
+// CHECK-PRAGMA-FLAG-NEXT: [[IDXPROM_6:%.*]] = sext i32 [[ADD1_I_6]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_6]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_6]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_6]], align 4, !tbaa [[CHAR_TBAA8]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_6:%.*]] = fadd float [[ADD_5]], [[NEXT_SROA_0_0_COPYLOAD_6]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_6]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_1_6:%.*]] = fadd float [[ADD_1_5]], [[NEXT_SROA_4_0_COPYLOAD_6]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_1_6]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_2_6:%.*]] = fadd float [[ADD_2_5]], [[NEXT_SROA_5_0_COPYLOAD_6]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_2_6]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_3_6:%.*]] = fadd float [[ADD_3_5]], [[NEXT_SROA_6_0_COPYLOAD_6]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_3_6]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_5]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[TMP27:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_6]] to i32
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD1_I_7:%.*]] = add i32 [[ADD_I]], [[TMP27]]
+// CHECK-PRAGMA-FLAG-NEXT: [[IDXPROM_7:%.*]] = sext i32 [[ADD1_I_7]] to i64
+// CHECK-PRAGMA-FLAG-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_7]]
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_0_0_COPYLOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_4_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_7]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 8
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_5_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_7]], align 4
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 12
+// CHECK-PRAGMA-FLAG-NEXT: [[NEXT_SROA_6_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_7]], align 4, !tbaa [[CHAR_TBAA8]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_7]] = fadd float [[ADD_6]], [[NEXT_SROA_0_0_COPYLOAD_7]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_7]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_1_7]] = fadd float [[ADD_1_6]], [[NEXT_SROA_4_0_COPYLOAD_7]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_1_7]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_2_7]] = fadd float [[ADD_2_6]], [[NEXT_SROA_5_0_COPYLOAD_7]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_2_7]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[ADD_3_7]] = fadd float [[ADD_3_6]], [[NEXT_SROA_6_0_COPYLOAD_7]]
+// CHECK-PRAGMA-FLAG-NEXT: store float [[ADD_3_7]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+// CHECK-PRAGMA-FLAG-NEXT: [[INDVARS_IV_NEXT_7]] = add nsw i64 [[INDVARS_IV_NEXT_6]], [[TMP1]]
+// CHECK-PRAGMA-FLAG-NEXT: [[CMP_7:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_7]], [[TMP2]]
+// CHECK-PRAGMA-FLAG-NEXT: br i1 [[CMP_7]], label %[[FOR_BODY]], label %[[FOR_END14]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK-PRAGMA-FLAG: [[FOR_END14]]:
+// CHECK-PRAGMA-FLAG-NEXT: ret void
+//
+void complex_loop(int input_offset, int step, int n, int off1, int off2, const struct ArgVec* reduce_buffer, struct ArgVec* value) {
+#ifdef PRAGMA_UNROLL
+ #pragma unroll
+#endif
+ for (; input_offset < n; input_offset += step) {
+ int idx = calc_offset(input_offset, off1, off2);
+ struct ArgVec next = reduce_buffer[idx];
+ #pragma unroll
+ for (int i = 0; i < output_vec_size; i++) {
+ value->v[i] = value->v[i] + next.v[i];
+ }
+ }
+}
+
+//.
+// CHECK-NOPRAGMA-NOFLAG: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK-NOPRAGMA-NOFLAG: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK-NOPRAGMA-NOFLAG: [[FLOAT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-NOPRAGMA-NOFLAG: [[META7]] = !{!"float", [[META4]], i64 0}
+// CHECK-NOPRAGMA-NOFLAG: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0}
+// CHECK-NOPRAGMA-NOFLAG: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]]}
+// CHECK-NOPRAGMA-NOFLAG: [[META10]] = !{!"llvm.loop.mustprogress"}
+//.
+// CHECK-NOPRAGMA-FLAG: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK-NOPRAGMA-FLAG: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK-NOPRAGMA-FLAG: [[FLOAT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-NOPRAGMA-FLAG: [[META7]] = !{!"float", [[META4]], i64 0}
+// CHECK-NOPRAGMA-FLAG: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0}
+// CHECK-NOPRAGMA-FLAG: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]]}
+// CHECK-NOPRAGMA-FLAG: [[META10]] = !{!"llvm.loop.mustprogress"}
+//.
+// CHECK-PRAGMA-NOFLAG: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK-PRAGMA-NOFLAG: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK-PRAGMA-NOFLAG: [[FLOAT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-PRAGMA-NOFLAG: [[META7]] = !{!"float", [[META4]], i64 0}
+// CHECK-PRAGMA-NOFLAG: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0}
+// CHECK-PRAGMA-NOFLAG: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+// CHECK-PRAGMA-NOFLAG: [[META10]] = !{!"llvm.loop.mustprogress"}
+// CHECK-PRAGMA-NOFLAG: [[META11]] = !{!"llvm.loop.unroll.enable"}
+//.
+// CHECK-PRAGMA-FLAG: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK-PRAGMA-FLAG: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK-PRAGMA-FLAG: [[FLOAT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK-PRAGMA-FLAG: [[META7]] = !{!"float", [[META4]], i64 0}
+// CHECK-PRAGMA-FLAG: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0}
+// CHECK-PRAGMA-FLAG: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]]}
+// CHECK-PRAGMA-FLAG: [[META10]] = !{!"llvm.loop.unroll.disable"}
+// CHECK-PRAGMA-FLAG: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]], [[META10]]}
+// CHECK-PRAGMA-FLAG: [[META12]] = !{!"llvm.loop.mustprogress"}
+//.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 6050650eb937c..06d8c2b12c90c 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -330,13 +330,14 @@ struct EstimatedUnrollCost {
};
struct PragmaInfo {
- PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
+ PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU, bool FPU)
: UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
- PragmaEnableUnroll(PEU) {}
+ PragmaEnableUnroll(PEU), ForcePragmaUnroll(FPU) {}
const bool UserUnrollCount;
const bool PragmaFullUnroll;
const unsigned PragmaCount;
const bool PragmaEnableUnroll;
+ const bool ForcePragmaUnroll;
};
} // end anonymous namespace
@@ -762,6 +763,12 @@ static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
}
+// Returns true if the loop has a metadata flag to allow expensive trip counts
+// when unrolling with a pragma.
+static bool hasRuntimeForceUnroll(const Loop *L) {
+ return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.force");
+}
+
// If loop has an unroll_count pragma return the (necessarily
// positive) value from the pragma. Otherwise return 0.
static unsigned unrollCountPragmaValue(const Loop *L) {
@@ -937,12 +944,13 @@ bool llvm::computeUnrollCount(
const bool PragmaFullUnroll = hasUnrollFullPragma(L);
const unsigned PragmaCount = unrollCountPragmaValue(L);
const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
+ const bool ForcePragmaUnroll = hasRuntimeForceUnroll(L);
const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
- PragmaEnableUnroll);
+ PragmaEnableUnroll, ForcePragmaUnroll);
// Use an explicit peel count that has been specified for testing. In this
// case it's not permitted to also specify an explicit unroll count.
if (PP.PeelCount) {
@@ -1102,7 +1110,8 @@ bool llvm::computeUnrollCount(
}
if (UP.Count == 0)
UP.Count = UP.DefaultUnrollRuntimeCount;
-
+ if (PragmaEnableUnroll && ForcePragmaUnroll)
+ UP.AllowExpensiveTripCount = true;
// Reduce unroll count to be the largest power-of-two factor of
// the original count which satisfies the threshold limit.
while (UP.Count != 0 &&
diff --git a/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll b/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll
new file mode 100644
index 0000000000000..bdae391f2c0f1
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll
@@ -0,0 +1,474 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -S -passes=loop-unroll | FileCheck %s
+; Checks that loops with expensive trip counts are unrolled when the force-unroll-pragma Metadata is present.
+; The first loop should be unrolled, while the second loop should not be unrolled.
+
+; ModuleID = 'force-unroll-pragma.c'
+source_filename = "force-unroll-pragma.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.ArgVec = type { [4 x float] }
+
+ at output_vec_size = local_unnamed_addr constant i32 4, align 4
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i32 @calc_offset(i32 noundef %input_offset, i32 noundef %off1, i32 noundef %off2) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local i32 @calc_offset(
+; CHECK-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[OFF1]], [[INPUT_OFFSET]]
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[OFF2]]
+; CHECK-NEXT: ret i32 [[ADD1]]
+;
+entry:
+ %add = add nsw i32 %off1, %input_offset
+ %add1 = add nsw i32 %add, %off2
+ ret i32 %add1
+}
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
+define dso_local void @complex_loop_unroll(i32 noundef %input_offset, i32 noundef %step, i32 noundef %n, i32 noundef %off1, i32 noundef %off2, ptr noundef readonly captures(none) %reduce_buffer, ptr noundef captures(none) %value) local_unnamed_addr #1 {
+; CHECK-LABEL: define dso_local void @complex_loop_unroll(
+; CHECK-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]]
+; CHECK-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]]
+; CHECK: [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]]
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
+; CHECK-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6:![0-9]+]]
+; CHECK-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4
+; CHECK-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8
+; CHECK-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12
+; CHECK-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP3]], i64 [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[SMAX]], [[TMP3]]
+; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 1)
+; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[SMAX]], [[UMIN]]
+; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP6]], [[TMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[UMIN]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP9]], 7
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY_PROL_PREHEADER:.*]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]]
+; CHECK: [[FOR_BODY_PROL_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY_PROL:.*]]
+; CHECK: [[FOR_BODY_PROL]]:
+; CHECK-NEXT: [[TMP10:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[ADD_3_PROL:%.*]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[ADD_2_PROL:%.*]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[ADD_1_PROL:%.*]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[ADD_PROL:%.*]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_PROL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL:%.*]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = trunc nsw i64 [[INDVARS_IV_PROL]] to i32
+; CHECK-NEXT: [[ADD1_I_PROL:%.*]] = add i32 [[ADD_I]], [[TMP14]]
+; CHECK-NEXT: [[IDXPROM_PROL:%.*]] = sext i32 [[ADD1_I_PROL]] to i64
+; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_PROL]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_PROL:%.*]] = load float, ptr [[ARRAYIDX_PROL]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_PROL]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_PROL]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_PROL]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_PROL:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_PROL]], align 4, !tbaa [[CHAR_TBAA8:![0-9]+]]
+; CHECK-NEXT: [[ADD_PROL]] = fadd float [[TMP13]], [[NEXT_SROA_0_0_COPYLOAD_PROL]]
+; CHECK-NEXT: store float [[ADD_PROL]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1_PROL]] = fadd float [[TMP12]], [[NEXT_SROA_4_0_COPYLOAD_PROL]]
+; CHECK-NEXT: store float [[ADD_1_PROL]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2_PROL]] = fadd float [[TMP11]], [[NEXT_SROA_5_0_COPYLOAD_PROL]]
+; CHECK-NEXT: store float [[ADD_2_PROL]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3_PROL]] = fadd float [[TMP10]], [[NEXT_SROA_6_0_COPYLOAD_PROL]]
+; CHECK-NEXT: store float [[ADD_3_PROL]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nsw i64 [[INDVARS_IV_PROL]], [[TMP1]]
+; CHECK-NEXT: [[CMP_PROL:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], [[TMP2]]
+; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[FOR_BODY_PROL]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT: [[DOTUNR_PH:%.*]] = phi float [ [[ADD_3_PROL]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[DOTUNR1_PH:%.*]] = phi float [ [[ADD_2_PROL]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[DOTUNR2_PH:%.*]] = phi float [ [[ADD_1_PROL]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[DOTUNR3_PH:%.*]] = phi float [ [[ADD_PROL]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: br label %[[FOR_BODY_PROL_LOOPEXIT]]
+; CHECK: [[FOR_BODY_PROL_LOOPEXIT]]:
+; CHECK-NEXT: [[DOTUNR:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[DOTUNR1:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR1_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[DOTUNR2:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR2_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[DOTUNR3:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR3_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_UNR_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP8]], 7
+; CHECK-NEXT: br i1 [[TMP15]], label %[[FOR_END14_LOOPEXIT:.*]], label %[[FOR_BODY_LR_PH_NEW:.*]]
+; CHECK: [[FOR_BODY_LR_PH_NEW]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP16:%.*]] = phi float [ [[DOTUNR]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = phi float [ [[DOTUNR1]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_2_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = phi float [ [[DOTUNR2]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_1_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = phi float [ [[DOTUNR3]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP20]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8]]
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP19]], [[NEXT_SROA_0_0_COPYLOAD]]
+; CHECK-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[TMP18]], [[NEXT_SROA_4_0_COPYLOAD]]
+; CHECK-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[TMP17]], [[NEXT_SROA_5_0_COPYLOAD]]
+; CHECK-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[TMP16]], [[NEXT_SROA_6_0_COPYLOAD]]
+; CHECK-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP1]]
+; CHECK-NEXT: [[TMP21:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[ADD1_I_1:%.*]] = add i32 [[ADD_I]], [[TMP21]]
+; CHECK-NEXT: [[IDXPROM_1:%.*]] = sext i32 [[ADD1_I_1]] to i64
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_1]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_1]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_1]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_1]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_1:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_1]], align 4, !tbaa [[CHAR_TBAA8]]
+; CHECK-NEXT: [[ADD_14:%.*]] = fadd float [[ADD]], [[NEXT_SROA_0_0_COPYLOAD_1]]
+; CHECK-NEXT: store float [[ADD_14]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1_1:%.*]] = fadd float [[ADD_1]], [[NEXT_SROA_4_0_COPYLOAD_1]]
+; CHECK-NEXT: store float [[ADD_1_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2_1:%.*]] = fadd float [[ADD_2]], [[NEXT_SROA_5_0_COPYLOAD_1]]
+; CHECK-NEXT: store float [[ADD_2_1]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3_1:%.*]] = fadd float [[ADD_3]], [[NEXT_SROA_6_0_COPYLOAD_1]]
+; CHECK-NEXT: store float [[ADD_3_1]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], [[TMP1]]
+; CHECK-NEXT: [[TMP22:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_1]] to i32
+; CHECK-NEXT: [[ADD1_I_2:%.*]] = add i32 [[ADD_I]], [[TMP22]]
+; CHECK-NEXT: [[IDXPROM_2:%.*]] = sext i32 [[ADD1_I_2]] to i64
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_2]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_2]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_2]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_2]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_2:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_2]], align 4, !tbaa [[CHAR_TBAA8]]
+; CHECK-NEXT: [[ADD_25:%.*]] = fadd float [[ADD_14]], [[NEXT_SROA_0_0_COPYLOAD_2]]
+; CHECK-NEXT: store float [[ADD_25]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1_2:%.*]] = fadd float [[ADD_1_1]], [[NEXT_SROA_4_0_COPYLOAD_2]]
+; CHECK-NEXT: store float [[ADD_1_2]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2_2:%.*]] = fadd float [[ADD_2_1]], [[NEXT_SROA_5_0_COPYLOAD_2]]
+; CHECK-NEXT: store float [[ADD_2_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3_2:%.*]] = fadd float [[ADD_3_1]], [[NEXT_SROA_6_0_COPYLOAD_2]]
+; CHECK-NEXT: store float [[ADD_3_2]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], [[TMP1]]
+; CHECK-NEXT: [[TMP23:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_2]] to i32
+; CHECK-NEXT: [[ADD1_I_3:%.*]] = add i32 [[ADD_I]], [[TMP23]]
+; CHECK-NEXT: [[IDXPROM_3:%.*]] = sext i32 [[ADD1_I_3]] to i64
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_3]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_3]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_3]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_3]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_3:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_3]], align 4, !tbaa [[CHAR_TBAA8]]
+; CHECK-NEXT: [[ADD_36:%.*]] = fadd float [[ADD_25]], [[NEXT_SROA_0_0_COPYLOAD_3]]
+; CHECK-NEXT: store float [[ADD_36]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1_3:%.*]] = fadd float [[ADD_1_2]], [[NEXT_SROA_4_0_COPYLOAD_3]]
+; CHECK-NEXT: store float [[ADD_1_3]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2_3:%.*]] = fadd float [[ADD_2_2]], [[NEXT_SROA_5_0_COPYLOAD_3]]
+; CHECK-NEXT: store float [[ADD_2_3]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3_3:%.*]] = fadd float [[ADD_3_2]], [[NEXT_SROA_6_0_COPYLOAD_3]]
+; CHECK-NEXT: store float [[ADD_3_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_2]], [[TMP1]]
+; CHECK-NEXT: [[TMP24:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_3]] to i32
+; CHECK-NEXT: [[ADD1_I_4:%.*]] = add i32 [[ADD_I]], [[TMP24]]
+; CHECK-NEXT: [[IDXPROM_4:%.*]] = sext i32 [[ADD1_I_4]] to i64
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_4]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_4]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_4]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_4]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_4:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_4]], align 4, !tbaa [[CHAR_TBAA8]]
+; CHECK-NEXT: [[ADD_4:%.*]] = fadd float [[ADD_36]], [[NEXT_SROA_0_0_COPYLOAD_4]]
+; CHECK-NEXT: store float [[ADD_4]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1_4:%.*]] = fadd float [[ADD_1_3]], [[NEXT_SROA_4_0_COPYLOAD_4]]
+; CHECK-NEXT: store float [[ADD_1_4]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2_4:%.*]] = fadd float [[ADD_2_3]], [[NEXT_SROA_5_0_COPYLOAD_4]]
+; CHECK-NEXT: store float [[ADD_2_4]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3_4:%.*]] = fadd float [[ADD_3_3]], [[NEXT_SROA_6_0_COPYLOAD_4]]
+; CHECK-NEXT: store float [[ADD_3_4]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_3]], [[TMP1]]
+; CHECK-NEXT: [[TMP25:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_4]] to i32
+; CHECK-NEXT: [[ADD1_I_5:%.*]] = add i32 [[ADD_I]], [[TMP25]]
+; CHECK-NEXT: [[IDXPROM_5:%.*]] = sext i32 [[ADD1_I_5]] to i64
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_5]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_5]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_5]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_5]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_5:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_5]], align 4, !tbaa [[CHAR_TBAA8]]
+; CHECK-NEXT: [[ADD_5:%.*]] = fadd float [[ADD_4]], [[NEXT_SROA_0_0_COPYLOAD_5]]
+; CHECK-NEXT: store float [[ADD_5]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1_5:%.*]] = fadd float [[ADD_1_4]], [[NEXT_SROA_4_0_COPYLOAD_5]]
+; CHECK-NEXT: store float [[ADD_1_5]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2_5:%.*]] = fadd float [[ADD_2_4]], [[NEXT_SROA_5_0_COPYLOAD_5]]
+; CHECK-NEXT: store float [[ADD_2_5]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3_5:%.*]] = fadd float [[ADD_3_4]], [[NEXT_SROA_6_0_COPYLOAD_5]]
+; CHECK-NEXT: store float [[ADD_3_5]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_4]], [[TMP1]]
+; CHECK-NEXT: [[TMP26:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_5]] to i32
+; CHECK-NEXT: [[ADD1_I_6:%.*]] = add i32 [[ADD_I]], [[TMP26]]
+; CHECK-NEXT: [[IDXPROM_6:%.*]] = sext i32 [[ADD1_I_6]] to i64
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_6]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_6]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_6]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_6]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_6:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_6]], align 4, !tbaa [[CHAR_TBAA8]]
+; CHECK-NEXT: [[ADD_6:%.*]] = fadd float [[ADD_5]], [[NEXT_SROA_0_0_COPYLOAD_6]]
+; CHECK-NEXT: store float [[ADD_6]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1_6:%.*]] = fadd float [[ADD_1_5]], [[NEXT_SROA_4_0_COPYLOAD_6]]
+; CHECK-NEXT: store float [[ADD_1_6]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2_6:%.*]] = fadd float [[ADD_2_5]], [[NEXT_SROA_5_0_COPYLOAD_6]]
+; CHECK-NEXT: store float [[ADD_2_6]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3_6:%.*]] = fadd float [[ADD_3_5]], [[NEXT_SROA_6_0_COPYLOAD_6]]
+; CHECK-NEXT: store float [[ADD_3_6]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_5]], [[TMP1]]
+; CHECK-NEXT: [[TMP27:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_6]] to i32
+; CHECK-NEXT: [[ADD1_I_7:%.*]] = add i32 [[ADD_I]], [[TMP27]]
+; CHECK-NEXT: [[IDXPROM_7:%.*]] = sext i32 [[ADD1_I_7]] to i64
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM_7]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX_7]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX_7]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_7]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD_7:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX_7]], align 4, !tbaa [[CHAR_TBAA8]]
+; CHECK-NEXT: [[ADD_7]] = fadd float [[ADD_6]], [[NEXT_SROA_0_0_COPYLOAD_7]]
+; CHECK-NEXT: store float [[ADD_7]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1_7]] = fadd float [[ADD_1_6]], [[NEXT_SROA_4_0_COPYLOAD_7]]
+; CHECK-NEXT: store float [[ADD_1_7]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2_7]] = fadd float [[ADD_2_6]], [[NEXT_SROA_5_0_COPYLOAD_7]]
+; CHECK-NEXT: store float [[ADD_2_7]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3_7]] = fadd float [[ADD_3_6]], [[NEXT_SROA_6_0_COPYLOAD_7]]
+; CHECK-NEXT: store float [[ADD_3_7]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nsw i64 [[INDVARS_IV_NEXT_6]], [[TMP1]]
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_7]], [[TMP2]]
+; CHECK-NEXT: br i1 [[CMP_7]], label %[[FOR_BODY]], label %[[FOR_END14_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[FOR_END14_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT: br label %[[FOR_END14_LOOPEXIT]]
+; CHECK: [[FOR_END14_LOOPEXIT]]:
+; CHECK-NEXT: br label %[[FOR_END14]]
+; CHECK: [[FOR_END14]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp23 = icmp slt i32 %input_offset, %n
+ br i1 %cmp23, label %for.body.lr.ph, label %for.end14
+
+for.body.lr.ph: ; preds = %entry
+ %add.i = add i32 %off2, %off1
+ %0 = sext i32 %input_offset to i64
+ %1 = sext i32 %step to i64
+ %2 = sext i32 %n to i64
+ %.pre = load float, ptr %value, align 4, !tbaa !6
+ %arrayidx5.1.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 4
+ %.pre27 = load float, ptr %arrayidx5.1.phi.trans.insert, align 4, !tbaa !6
+ %arrayidx5.2.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 8
+ %.pre28 = load float, ptr %arrayidx5.2.phi.trans.insert, align 4, !tbaa !6
+ %arrayidx5.3.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 12
+ %.pre29 = load float, ptr %arrayidx5.3.phi.trans.insert, align 4, !tbaa !6
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %3 = phi float [ %.pre29, %for.body.lr.ph ], [ %add.3, %for.body ]
+ %4 = phi float [ %.pre28, %for.body.lr.ph ], [ %add.2, %for.body ]
+ %5 = phi float [ %.pre27, %for.body.lr.ph ], [ %add.1, %for.body ]
+ %6 = phi float [ %.pre, %for.body.lr.ph ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %7 = trunc nsw i64 %indvars.iv to i32
+ %add1.i = add i32 %add.i, %7
+ %idxprom = sext i32 %add1.i to i64
+ %arrayidx = getelementptr inbounds %struct.ArgVec, ptr %reduce_buffer, i64 %idxprom
+ %next.sroa.0.0.copyload = load float, ptr %arrayidx, align 4
+ %next.sroa.4.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 4
+ %next.sroa.4.0.copyload = load float, ptr %next.sroa.4.0.arrayidx.sroa_idx, align 4
+ %next.sroa.5.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 8
+ %next.sroa.5.0.copyload = load float, ptr %next.sroa.5.0.arrayidx.sroa_idx, align 4
+ %next.sroa.6.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 12
+ %next.sroa.6.0.copyload = load float, ptr %next.sroa.6.0.arrayidx.sroa_idx, align 4, !tbaa !8
+ %add = fadd float %6, %next.sroa.0.0.copyload
+ store float %add, ptr %value, align 4, !tbaa !6
+ %add.1 = fadd float %5, %next.sroa.4.0.copyload
+ store float %add.1, ptr %arrayidx5.1.phi.trans.insert, align 4, !tbaa !6
+ %add.2 = fadd float %4, %next.sroa.5.0.copyload
+ store float %add.2, ptr %arrayidx5.2.phi.trans.insert, align 4, !tbaa !6
+ %add.3 = fadd float %3, %next.sroa.6.0.copyload
+ store float %add.3, ptr %arrayidx5.3.phi.trans.insert, align 4, !tbaa !6
+ %indvars.iv.next = add nsw i64 %indvars.iv, %1
+ %cmp = icmp slt i64 %indvars.iv.next, %2
+ br i1 %cmp, label %for.body, label %for.end14, !llvm.loop !9
+
+for.end14: ; preds = %for.body, %entry
+ ret void
+}
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
+define dso_local void @complex_loop_nounroll(i32 noundef %input_offset, i32 noundef %step, i32 noundef %n, i32 noundef %off1, i32 noundef %off2, ptr noundef readonly captures(none) %reduce_buffer, ptr noundef captures(none) %value) local_unnamed_addr #1 {
+; CHECK-LABEL: define dso_local void @complex_loop_nounroll(
+; CHECK-SAME: i32 noundef [[INPUT_OFFSET:%.*]], i32 noundef [[STEP:%.*]], i32 noundef [[N:%.*]], i32 noundef [[OFF1:%.*]], i32 noundef [[OFF2:%.*]], ptr noundef readonly captures(none) [[REDUCE_BUFFER:%.*]], ptr noundef captures(none) [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP23:%.*]] = icmp slt i32 [[INPUT_OFFSET]], [[N]]
+; CHECK-NEXT: br i1 [[CMP23]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END14:.*]]
+; CHECK: [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[OFF2]], [[OFF1]]
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INPUT_OFFSET]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[STEP]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
+; CHECK-NEXT: [[DOTPRE:%.*]] = load float, ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ARRAYIDX5_1_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 4
+; CHECK-NEXT: [[DOTPRE27:%.*]] = load float, ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ARRAYIDX5_2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 8
+; CHECK-NEXT: [[DOTPRE28:%.*]] = load float, ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ARRAYIDX5_3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds nuw i8, ptr [[VALUE]], i64 12
+; CHECK-NEXT: [[DOTPRE29:%.*]] = load float, ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[DOTPRE29]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[DOTPRE28]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_2:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE27]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_1:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi float [ [[DOTPRE]], %[[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[ADD1_I:%.*]] = add i32 [[ADD_I]], [[TMP7]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD1_I]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ARGVEC:%.*]], ptr [[REDUCE_BUFFER]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[NEXT_SROA_0_0_COPYLOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
+; CHECK-NEXT: [[NEXT_SROA_4_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_4_0_ARRAYIDX_SROA_IDX]], align 4
+; CHECK-NEXT: [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8
+; CHECK-NEXT: [[NEXT_SROA_5_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_5_0_ARRAYIDX_SROA_IDX]], align 4
+; CHECK-NEXT: [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12
+; CHECK-NEXT: [[NEXT_SROA_6_0_COPYLOAD:%.*]] = load float, ptr [[NEXT_SROA_6_0_ARRAYIDX_SROA_IDX]], align 4, !tbaa [[CHAR_TBAA8]]
+; CHECK-NEXT: [[ADD]] = fadd float [[TMP6]], [[NEXT_SROA_0_0_COPYLOAD]]
+; CHECK-NEXT: store float [[ADD]], ptr [[VALUE]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_1]] = fadd float [[TMP5]], [[NEXT_SROA_4_0_COPYLOAD]]
+; CHECK-NEXT: store float [[ADD_1]], ptr [[ARRAYIDX5_1_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_2]] = fadd float [[TMP4]], [[NEXT_SROA_5_0_COPYLOAD]]
+; CHECK-NEXT: store float [[ADD_2]], ptr [[ARRAYIDX5_2_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[ADD_3]] = fadd float [[TMP3]], [[NEXT_SROA_6_0_COPYLOAD]]
+; CHECK-NEXT: store float [[ADD_3]], ptr [[ARRAYIDX5_3_PHI_TRANS_INSERT]], align 4, !tbaa [[FLOAT_TBAA6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP1]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END14_LOOPEXIT:.*]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[FOR_END14_LOOPEXIT]]:
+; CHECK-NEXT: br label %[[FOR_END14]]
+; CHECK: [[FOR_END14]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp23 = icmp slt i32 %input_offset, %n
+ br i1 %cmp23, label %for.body.lr.ph, label %for.end14
+
+for.body.lr.ph: ; preds = %entry
+ %add.i = add i32 %off2, %off1
+ %0 = sext i32 %input_offset to i64
+ %1 = sext i32 %step to i64
+ %2 = sext i32 %n to i64
+ %.pre = load float, ptr %value, align 4, !tbaa !6
+ %arrayidx5.1.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 4
+ %.pre27 = load float, ptr %arrayidx5.1.phi.trans.insert, align 4, !tbaa !6
+ %arrayidx5.2.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 8
+ %.pre28 = load float, ptr %arrayidx5.2.phi.trans.insert, align 4, !tbaa !6
+ %arrayidx5.3.phi.trans.insert = getelementptr inbounds nuw i8, ptr %value, i64 12
+ %.pre29 = load float, ptr %arrayidx5.3.phi.trans.insert, align 4, !tbaa !6
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %3 = phi float [ %.pre29, %for.body.lr.ph ], [ %add.3, %for.body ]
+ %4 = phi float [ %.pre28, %for.body.lr.ph ], [ %add.2, %for.body ]
+ %5 = phi float [ %.pre27, %for.body.lr.ph ], [ %add.1, %for.body ]
+ %6 = phi float [ %.pre, %for.body.lr.ph ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %7 = trunc nsw i64 %indvars.iv to i32
+ %add1.i = add i32 %add.i, %7
+ %idxprom = sext i32 %add1.i to i64
+ %arrayidx = getelementptr inbounds %struct.ArgVec, ptr %reduce_buffer, i64 %idxprom
+ %next.sroa.0.0.copyload = load float, ptr %arrayidx, align 4
+ %next.sroa.4.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 4
+ %next.sroa.4.0.copyload = load float, ptr %next.sroa.4.0.arrayidx.sroa_idx, align 4
+ %next.sroa.5.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 8
+ %next.sroa.5.0.copyload = load float, ptr %next.sroa.5.0.arrayidx.sroa_idx, align 4
+ %next.sroa.6.0.arrayidx.sroa_idx = getelementptr inbounds nuw i8, ptr %arrayidx, i64 12
+ %next.sroa.6.0.copyload = load float, ptr %next.sroa.6.0.arrayidx.sroa_idx, align 4, !tbaa !8
+ %add = fadd float %6, %next.sroa.0.0.copyload
+ store float %add, ptr %value, align 4, !tbaa !6
+ %add.1 = fadd float %5, %next.sroa.4.0.copyload
+ store float %add.1, ptr %arrayidx5.1.phi.trans.insert, align 4, !tbaa !6
+ %add.2 = fadd float %4, %next.sroa.5.0.copyload
+ store float %add.2, ptr %arrayidx5.2.phi.trans.insert, align 4, !tbaa !6
+ %add.3 = fadd float %3, %next.sroa.6.0.copyload
+ store float %add.3, ptr %arrayidx5.3.phi.trans.insert, align 4, !tbaa !6
+ %indvars.iv.next = add nsw i64 %indvars.iv, %1
+ %cmp = icmp slt i64 %indvars.iv.next, %2
+ br i1 %cmp, label %for.body, label %for.end14, !llvm.loop !13
+
+for.end14: ; preds = %for.body, %entry
+ ret void
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+attributes #1 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!llvm.errno.tbaa = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 23.0.0git (https://github.com/adelejjeh/llvm-project 5e0e389360d569e5b3918e61a615d52328649533)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"float", !4, i64 0}
+!8 = !{!4, !4, i64 0}
+!9 = distinct !{!9, !10, !11, !12}
+!10 = !{!"llvm.loop.mustprogress"}
+!11 = !{!"llvm.loop.unroll.enable"}
+!12 = !{!"llvm.loop.unroll.runtime.force"}
+!13 = distinct !{!13, !10, !11}
+;.
+; CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[FLOAT_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+; CHECK: [[META7]] = !{!"float", [[META4]], i64 0}
+; CHECK: [[CHAR_TBAA8]] = !{[[META4]], [[META4]], i64 0}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]]}
+; CHECK: [[META10]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]], [[META10]]}
+; CHECK: [[META12]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META12]], [[META14:![0-9]+]]}
+; CHECK: [[META14]] = !{!"llvm.loop.unroll.enable"}
+;.
More information about the cfe-commits
mailing list