[llvm] [LoopUnroll] Simplify reduction operations after a loop unroll (PR #84805)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 12 02:54:24 PDT 2024
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/84805
>From f786d8486fe27cae3e5537ddfb2cf7f8196483d7 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at ed.ac.uk>
Date: Mon, 11 Mar 2024 17:39:20 +0000
Subject: [PATCH 1/3] [LoopUnroll][NFC] Add tests for reduction deinterleaving
---
llvm/test/CodeGen/AArch64/polybench-3mm.ll | 53 ++++
.../LoopUnroll/simplify-reductions.ll | 295 ++++++++++++++++++
2 files changed, 348 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/polybench-3mm.ll
create mode 100644 llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
diff --git a/llvm/test/CodeGen/AArch64/polybench-3mm.ll b/llvm/test/CodeGen/AArch64/polybench-3mm.ll
new file mode 100644
index 00000000000000..034d7f44a95f41
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/polybench-3mm.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: opt -passes=loop-unroll,instcombine -unroll-count=2 %s | llc --mattr=,+neon | FileCheck %s
+
+target triple = "aarch64"
+
+; This is a reduced example adapted from the Polybench 3MM kernel.
+; We are doing something similar to:
+; double dot = 0.0;
+; for (long k = 0; k < 1000; k++)
+; dot += A[k] * B[k*nb];
+; return dot;
+
+define double @test(ptr %A, ptr %B, i64 %nb) {
+; CHECK-LABEL: test:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi d0, #0000000000000000
+; CHECK-NEXT: lsl x8, x2, #4
+; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: .LBB0_1: // %loop
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: add x10, x0, x9, lsl #3
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: add x9, x9, #2
+; CHECK-NEXT: cmp x9, #1000
+; CHECK-NEXT: ldp d2, d3, [x10]
+; CHECK-NEXT: fmadd d0, d1, d2, d0
+; CHECK-NEXT: ldr d1, [x1, x2, lsl #3]
+; CHECK-NEXT: add x1, x1, x8
+; CHECK-NEXT: fmadd d0, d1, d3, d0
+; CHECK-NEXT: b.ne .LBB0_1
+; CHECK-NEXT: // %bb.2: // %exit
+; CHECK-NEXT: ret
+entry:
+ br label %loop
+
+loop:
+ %k = phi i64 [ %k.next, %loop ], [ 0, %entry ]
+ %dot = phi double [ %dot.next, %loop ], [ 0.000000e+00, %entry ]
+ %A.gep = getelementptr inbounds double, ptr %A, i64 %k
+ %A.val = load double, ptr %A.gep, align 8
+ %B.idx = mul nsw i64 %k, %nb
+ %B.gep = getelementptr inbounds double, ptr %B, i64 %B.idx
+ %B.val = load double, ptr %B.gep, align 8
+ %fmul = fmul fast double %B.val, %A.val
+ %dot.next = fadd fast double %fmul, %dot
+ %k.next = add nuw nsw i64 %k, 1
+ %cmp = icmp eq i64 %k.next, 1000
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %dot.next.lcssa = phi double [ %dot.next, %loop ]
+ ret double %dot.next.lcssa
+}
diff --git a/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll b/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
new file mode 100644
index 00000000000000..2d4dd76a6cab37
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
@@ -0,0 +1,295 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=loop-unroll -unroll-count=2 < %s | FileCheck %s
+
+; The loops below are variations of:
+; double sum = 0;
+; for(long i = 0; i < 1000; i++)
+; sum += A[i];
+; return sum;
+
+; Positive test, simple case.
+define double @p1(ptr %A) {
+; CHECK-LABEL: @p1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT: [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT: [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT: [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: ret double [[SUM_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %sum = phi double [ %sum.next, %loop ], [ 0.000000e+00, %entry ]
+ %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+ %A.val = load double, ptr %A.gep, align 8
+ %sum.next = fadd fast double %A.val, %sum
+ %i.next = add nuw nsw i64 %i, 1
+ %cmp = icmp eq i64 %i.next, 1000
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %sum.lcssa = phi double [ %sum.next, %loop ]
+ ret double %sum.lcssa
+}
+
+; Positive test, non-zero starting sum.
+define double @p2(ptr %A, double %acc) {
+; CHECK-LABEL: @p2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi double [ [[ACC:%.*]], [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT: [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT: [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT: [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: ret double [[SUM_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %sum = phi double [ %sum.next, %loop ], [ %acc, %entry ]
+ %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+ %A.val = load double, ptr %A.gep, align 8
+ %sum.next = fadd fast double %A.val, %sum
+ %i.next = add nuw nsw i64 %i, 1
+ %cmp = icmp eq i64 %i.next, 1000
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %sum.lcssa = phi double [ %sum.next, %loop ]
+ ret double %sum.lcssa
+}
+
+; Positive test, non-floating-point type.
+define i64 @p3(ptr %A) {
+; CHECK-LABEL: @p3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[AND:%.*]] = phi i64 [ -1, [[ENTRY]] ], [ [[AND_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load i64, ptr [[A_GEP]], align 8
+; CHECK-NEXT: [[AND_NEXT:%.*]] = and i64 [[A_VAL]], [[AND]]
+; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT: [[A_VAL_1:%.*]] = load i64, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT: [[AND_NEXT_1]] = and i64 [[A_VAL_1]], [[AND_NEXT]]
+; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[AND_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: ret i64 [[AND_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %and = phi i64 [ %and.next, %loop ], [ -1, %entry ]
+ %A.gep = getelementptr inbounds i64, ptr %A, i64 %i
+ %A.val = load i64, ptr %A.gep, align 8
+ %and.next = and i64 %A.val, %and
+ %i.next = add nuw nsw i64 %i, 1
+ %cmp = icmp eq i64 %i.next, 1000
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %and.lcssa = phi i64 [ %and.next, %loop ]
+ ret i64 %and.lcssa
+}
+
+; Negative test: Sum is used in the loop, which prevents breaking the
+; reduction.
+define double @n1(ptr %A) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT: [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT: store double [[SUM_NEXT]], ptr [[A_GEP]], align 8
+; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT: [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT: [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT: store double [[SUM_NEXT_1]], ptr [[A_GEP_1]], align 8
+; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: ret double [[SUM_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %sum = phi double [ %sum.next, %loop ], [ 0.000000e+00, %entry ]
+ %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+ %A.val = load double, ptr %A.gep, align 8
+ %sum.next = fadd fast double %A.val, %sum
+ store double %sum.next, ptr %A.gep
+ %i.next = add nuw nsw i64 %i, 1
+ %cmp = icmp eq i64 %i.next, 1000
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %sum.lcssa = phi double [ %sum.next, %loop ]
+ ret double %sum.lcssa
+}
+
+; Negative test: Reduction op is not associative or commutative.
+define double @n2(ptr %A) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT: [[SUM_NEXT:%.*]] = fadd double [[A_VAL]], [[SUM]]
+; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT: [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT: [[SUM_NEXT_1]] = fadd double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: ret double [[SUM_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %sum = phi double [ %sum.next, %loop ], [ 0.000000e+00, %entry ]
+ %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+ %A.val = load double, ptr %A.gep, align 8
+ %sum.next = fadd double %A.val, %sum
+ %i.next = add nuw nsw i64 %i, 1
+ %cmp = icmp eq i64 %i.next, 1000
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %sum.lcssa = phi double [ %sum.next, %loop ]
+ ret double %sum.lcssa
+}
+
+; Negative test: Reduction op is unsupported.
+define double @n3(ptr %A) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[PROD:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[PROD_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT: [[PROD_NEXT:%.*]] = fmul fast double [[A_VAL]], [[PROD]]
+; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT: [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT: [[PROD_NEXT_1]] = fmul fast double [[A_VAL_1]], [[PROD_NEXT]]
+; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[PROD_LCSSA:%.*]] = phi double [ [[PROD_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: ret double [[PROD_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %prod = phi double [ %prod.next, %loop ], [ 1.000000e+00, %entry ]
+ %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+ %A.val = load double, ptr %A.gep, align 8
+ %prod.next = fmul fast double %A.val, %prod
+ %i.next = add nuw nsw i64 %i, 1
+ %cmp = icmp eq i64 %i.next, 1000
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %prod.lcssa = phi double [ %prod.next, %loop ]
+ ret double %prod.lcssa
+}
+
+; Negative test: Chain of different operations.
+define i64 @n4(ptr %A) {
+; CHECK-LABEL: @n4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RED_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load i64, ptr [[A_GEP]], align 8
+; CHECK-NEXT: [[RED_TEMP:%.*]] = add i64 [[A_VAL]], [[RED]]
+; CHECK-NEXT: [[RED_NEXT:%.*]] = and i64 [[RED_TEMP]], 7
+; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT: [[A_VAL_1:%.*]] = load i64, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT: [[RED_TEMP_1:%.*]] = add i64 [[A_VAL_1]], [[RED_NEXT]]
+; CHECK-NEXT: [[RED_NEXT_1]] = and i64 [[RED_TEMP_1]], 7
+; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[RED_LCSSA:%.*]] = phi i64 [ [[RED_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: ret i64 [[RED_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %red = phi i64 [ %red.next, %loop ], [ 0, %entry ]
+ %A.gep = getelementptr inbounds i64, ptr %A, i64 %i
+ %A.val = load i64, ptr %A.gep, align 8
+ %red.temp = add i64 %A.val, %red
+ %red.next = and i64 %red.temp, 7
+ %i.next = add nuw nsw i64 %i, 1
+ %cmp = icmp eq i64 %i.next, 1000
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %red.lcssa = phi i64 [ %red.next, %loop ]
+ ret i64 %red.lcssa
+}
>From 1ec52ec6c2e1e4edefd0b2a02fad005655c1303e Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at ed.ac.uk>
Date: Mon, 11 Mar 2024 15:56:08 +0000
Subject: [PATCH 2/3] [LoopUnroll] Simplify reduction operations after a loop
unroll
Try to simplify reductions (e.g. chains of floating-point adds) into
independent operations after unrolling a loop. This is a very common
pattern in unrolled loops that compute dot products (for example) and
can help with vectorisation.
---
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 267 +++++++++
llvm/test/CodeGen/AArch64/polybench-3mm.ll | 14 +-
.../LoopUnroll/AArch64/falkor-prefetch.ll | 20 +
.../LoopUnroll/ARM/instr-size-costs.ll | 20 +-
llvm/test/Transforms/LoopUnroll/X86/znver3.ll | 550 +++++++++++++-----
.../Transforms/LoopUnroll/runtime-loop5.ll | 20 +-
.../LoopUnroll/runtime-unroll-remainder.ll | 20 +-
.../LoopUnroll/simplify-reductions.ll | 32 +-
.../PhaseOrdering/SystemZ/sub-xor.ll | 163 ++++--
9 files changed, 869 insertions(+), 237 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 6f0d000815726e..b14d05d642e275 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -84,6 +84,11 @@ STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
"latch (completely or otherwise)");
+static cl::opt<bool>
+UnrollSimplifyReductions("unroll-simplify-reductions", cl::init(true),
+ cl::Hidden, cl::desc("Try to simplify reductions "
+ "after unrolling a loop."));
+
static cl::opt<bool>
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
cl::desc("Allow runtime unrolled loops to be unrolled "
@@ -209,6 +214,258 @@ static bool isEpilogProfitable(Loop *L) {
return false;
}
+/// This function tries to break apart simple reduction loops like the one
+/// below:
+///
+/// loop:
+/// PN = PHI [SUM2, loop], ...
+/// X = ...
+/// SUM1 = ADD (X, PN)
+/// Y = ...
+/// SUM2 = ADD (Y, SUM1)
+/// br loop
+///
+/// into independent sums of the form:
+///
+/// loop:
+/// PN1 = PHI [SUM1, loop], ...
+/// PN2 = PHI [SUM2, loop], ...
+/// X = ...
+/// SUM1 = ADD (X, PN1)
+/// Y = ...
+/// SUM2 = ADD (Y, PN2)
+/// <Reductions>
+/// br loop
+///
+/// where <Reductions> are new instructions inserted to compute the final
+/// values of the reduction from the partial sums we introduced, in this case:
+///
+/// <Reductions> =
+/// PN.red = ADD (PN1, PN2)
+/// SUM1.red = ADD (SUM1, PN2)
+/// SUM2.red = ADD (SUM1, SUM2)
+///
+/// In practice in most cases only one or two of the reduced values are
+/// required outside the loop so most of the reduction instructions do not
+/// need to be added into the loop. Moreover, these instructions can be sunk
+/// from the loop which happens in later passes.
+///
+/// This is a very common pattern in unrolled loops that compute dot products
+/// (for example) and breaking apart the reduction chains can help greatly with
+/// vectorisation.
+static bool trySimplifyReductions(Instruction &I) {
+ // Check if I is a PHINode (potentially the start of a reduction chain).
+ // Note: For simplicity we only consider loops that consists of a single
+ // basic block that branches to itself.
+ BasicBlock *BB = I.getParent();
+ PHINode *PN = dyn_cast<PHINode>(&I);
+ if (!PN || PN->getBasicBlockIndex(BB) == -1)
+ return false;
+
+ // Attempt to construct a list of instructions that are chained together
+ // (i.e. that perform a reduction).
+ SmallVector<BinaryOperator *, 16> Ops;
+ for (Instruction *Cur = PN, *Next = nullptr; /* true */; Cur = Next,
+ Next = nullptr) {
+ // Try to find the next element in the reduction chain.
+ for (auto *U : Cur->users()) {
+ auto *Candidate = dyn_cast<Instruction>(U);
+ if (Candidate && Candidate->getParent() == BB) {
+ // If we've already found a candidate element for the chain and we find
+ // *another* candidate we bail out as this means the intermediate
+ // values of the reduction are needed within the loop, and so there is
+ // no point in breaking the reduction apart.
+ if (Next)
+ return false;
+ Next = Candidate;
+ }
+ }
+ // If we've reached the start, i.e. the next element in the chain would be
+ // the PN we started with, we are done.
+ if (Next == PN)
+ break;
+ // Else, check if we found a candidate at all and if so if it is a binary
+ // operator.
+ if (!Next || !isa<BinaryOperator>(Next))
+ return false;
+ // If everything checks out, add the new element to the chain.
+ Ops.push_back(cast<BinaryOperator>(Next));
+ }
+
+ // Ensure the reduction comprises at least two instructions, otherwise this
+ // is a trivial reduction of a single element that doesn't need to be
+ // simplified.
+ if (Ops.size() < 2)
+ return false;
+
+ LLVM_DEBUG(
+ dbgs() << "Found candidate reduction: " << I << "\n";
+ for (auto const *Op : Ops)
+ dbgs() << " | " << *Op << "\n";
+ );
+
+ // Ensure all instructions perform the same operation and that the operation
+ // is associative and commutative so that we can break the chain apart and
+ // reassociate the Ops.
+ Instruction::BinaryOps const Opcode = Ops[0]->getOpcode();
+ for (auto const *Op : Ops)
+ if (Op->getOpcode() != Opcode || !Op->isAssociative() ||
+ !Op->isCommutative())
+ return false;
+
+ // Define the neutral element of the reduction or bail out if we don't have
+ // one defined.
+ // TODO: This could be generalised to other operations (e.g. MUL's).
+ Value *NeutralElem = nullptr;
+ switch (Opcode) {
+ case Instruction::BinaryOps::Add:
+ case Instruction::BinaryOps::Or:
+ case Instruction::BinaryOps::Xor:
+ case Instruction::BinaryOps::FAdd:
+ NeutralElem = Constant::getNullValue(PN->getType());
+ break;
+ case Instruction::BinaryOps::And:
+ NeutralElem = Constant::getAllOnesValue(PN->getType());
+ break;
+ case Instruction::BinaryOps::Mul:
+ case Instruction::BinaryOps::FMul:
+ default:
+ return false;
+ }
+ assert(NeutralElem && "Neutral element of reduction undefined.");
+
+ // --------------------------------------------------------------------- //
+ // At this point Ops is a list of chained binary operations performing a //
+ // reduction that we know we can break apart. //
+ // --------------------------------------------------------------------- //
+
+ // For shorthand, let N be the length of the chain.
+ unsigned const N = Ops.size();
+ LLVM_DEBUG(dbgs() << "Simplifying reduction of length " << N << ".\n");
+
+ // Create new phi nodes for all but the first element in the chain.
+ SmallVector<PHINode *, 16> Phis{PN};
+ for (unsigned i = 1; i < N; i++) {
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getNumIncomingValues(),
+ PN->getName());
+ // Copy incoming blocks from the first/original PN to the new Phi and set
+ // their incoming values to the neutral element of the reduction.
+ for (auto *IncomingBB : PN->blocks())
+ NewPN->addIncoming(NeutralElem, IncomingBB);
+ NewPN->insertAfter(Phis.back());
+ Phis.push_back(NewPN);
+ }
+
+ // Set the chained operands of the Ops to the Phis and the incoming values of
+ // the Phis (for this BB) to the Ops.
+ for (unsigned i = 0; i < N; i++) {
+ PHINode *Phi = Phis[i];
+ Instruction *Op = Ops[i];
+
+ // Find the index of the operand of Op to replace. The first Op reads its
+ // value from the first Phi node. The other Ops read their value from the
+ // previous Op.
+ Value *OperandToReplace = i == 0 ? cast<Value>(PN) : Ops[i-1];
+ unsigned OperandIdx = Op->getOperand(0) == OperandToReplace ? 0 : 1;
+ assert(Op->getOperand(OperandIdx) == OperandToReplace &&
+ "Operand mismatch. Perhaps a malformed chain?");
+
+ // Set the operand of Op to Phi and the incoming value of Phi for BB to Op.
+ Op->setOperand(OperandIdx, Phi);
+ Phi->setIncomingValueForBlock(BB, Op);
+ }
+
+ // Replace old uses of PN and Ops outside this BB with the updated totals.
+ // The "old" total corresponding to PN now corresponds to the sum of all
+ // Phis. Similarly, the old totals in Ops correspond to the sum of the
+ // partial results in the new Ops up to the index of the Op we want to
+ // compute, plus the sum of the Phis from that index onwards.
+ //
+ // More rigorously, the totals can be computed as follows.
+ // 1. Let k be an index in the list of length N+1 below of the variables we
+ // want to compute the new totals for:
+ // { PN, Ops[0], Ops[1], ... }
+ // 2. Let Sum(k) denote the new total to compute for the k-th variable in the
+ // list above. Then,
+ // Sum(0) = Sum(PN) = \sum_{0 <= i < N} Phis[i],
+ // Sum(1) = Sum(Ops[0]) = \sum_{0 <= i < 1} Ops[i] +
+ // \sum_{1 <= i < N} Phis[i],
+ // ...
+ // Sum(N) = Sum(Ops[N-1]) = \sum_{0 <= i < N} Ops[i].
+ // 3. More generally,
+ // Sum(k) = Sum(PN) if k == 0 else Sum(Ops[k-1])
+ // = \sum_{0 <= i < k} Ops[i] +
+ // \sum_{k <= i < N} Phis[i],
+ // for 0 <= k <= N.
+ // 4. Finally, if we name the sums in Ops and Phis separately, i.e.
+ // SOps(k) = \sum_{0 <= i < k} Ops[i],
+ // SPhis(k) = \sum_{k <= i < N} Phis[i],
+ // then
+ // Sum(k) = SOps(k) + SPhis(k), 0 <= k <= N.
+ // .
+
+ // Helper function to create a new binary op.
+ // Note: We copy the flags from Ops[0]. Could this be too permissive?
+ auto CreateBinOp = [&](Value *V1, Value *V2) {
+ auto Name = PN->getName()+".red";
+ return BinaryOperator::CreateWithCopiedFlags(Opcode, V1, V2, Ops[0],
+ Name, &BB->back());
+ };
+
+ // Compute the partial sums of the Ops:
+ // SOps[k] = \sum_{0 <= i < k} Ops[i], 0 <= k <= N.
+ // For 1 <= k <= N we have:
+ // SOps[k] = Ops[k-1] + \sum_{0 <= i < k-1} Ops[i]
+ // = Ops[k-1] + SOps[k-1],
+ // so if we compute SOps in order (i.e. from 0 to N) we can reuse partial
+ // results.
+ SmallVector<Value *, 16> SOps(N+1);
+ SOps[0] = nullptr; // alternatively we could use NeutralElem
+ SOps[1] = Ops.front();
+ for (unsigned k = 2; k <= N; k++)
+ SOps[k] = CreateBinOp(SOps[k-1], Ops[k-1]);
+
+ // Compute the partial sums of the Phis:
+ // SPhis[k] = \sum_{k <= i < N} Phis[i], 0 <= k <= N.
+ // Similarly, for 0 <= k <= N-1 we have:
+ // SPhis[k] = Phis[k] + \sum_{k+1 <= i < N} Phis[i]
+ // = Phis[k] + SPhis[k+1],
+ // so if we compute SPhis in reverse (i.e. from N down to 0) we can reuse the
+ // partial sums computed thus far.
+ SmallVector<Value *, 16> SPhis(N+1);
+ SPhis[N] = nullptr; // alternatively we could use NeutralElem
+ SPhis[N-1] = Phis.back();
+ for (signed k = N-2; k >= 0; k--)
+ SPhis[k] = CreateBinOp(SPhis[k+1], Phis[k]);
+
+ // Finally, compute the total sums for PN and Ops from:
+ // Sums[k] = SOps[k] + SPhis[k], 0 <= k <= N.
+ // These sums might be dead so we had them to a weak tracking vector for
+ // cleanup after.
+ SmallVector<WeakTrackingVH, 16> Sums(N+1);
+ for (unsigned k = 0; k <= N; k++) {
+ // Pick the Op we want to compute the new total for.
+ Value *Op = k == 0 ? cast<Value>(PN) : Ops[k-1];
+
+ Value *SOp = SOps[k], *SPhi = SPhis[k];
+ if (SOp && SPhi)
+ Sums[k] = CreateBinOp(SOp, SPhi);
+ else if (SOp)
+ Sums[k] = SOp;
+ else
+ Sums[k] = SPhi;
+
+ // Replace uses of the old total with the new total.
+ Op->replaceUsesOutsideBlock(Sums[k], BB);
+ }
+
+ // Drop dead totals. In case the totals *are* used they could and should be
+ // sunk, but this happens in later passes so we don't bother doing it here.
+ RecursivelyDeleteTriviallyDeadInstructionsPermissive(Sums);
+
+ return true;
+}
+
/// Perform some cleanup and simplifications on loops after unrolling. It is
/// useful to simplify the IV's in the new loop, as well as do a quick
/// simplify/dce pass of the instructions.
@@ -272,6 +529,16 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
// have a phi which (potentially indirectly) uses instructions later in
// the block we're iterating through.
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+ // Try to simplify reductions (e.g. chains of floating-point adds) into
+ // independent operations (see more at trySimplifyReductions). This is a
+ // very common pattern in unrolled loops that compute dot products (for
+ // example).
+ //
+ // We do this outside the loop over the instructions above to let
+ // instsimplify kick in before trying to apply this transform.
+ if (UnrollSimplifyReductions)
+ for (PHINode &PN : BB->phis())
+ trySimplifyReductions(PN);
}
}
diff --git a/llvm/test/CodeGen/AArch64/polybench-3mm.ll b/llvm/test/CodeGen/AArch64/polybench-3mm.ll
index 034d7f44a95f41..309fa0e9a305f7 100644
--- a/llvm/test/CodeGen/AArch64/polybench-3mm.ll
+++ b/llvm/test/CodeGen/AArch64/polybench-3mm.ll
@@ -14,21 +14,23 @@ define double @test(ptr %A, ptr %B, i64 %nb) {
; CHECK-LABEL: test:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d0, #0000000000000000
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: lsl x8, x2, #4
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: .LBB0_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: add x10, x0, x9, lsl #3
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x1]
+; CHECK-NEXT: ldr d5, [x1, x2, lsl #3]
; CHECK-NEXT: add x9, x9, #2
-; CHECK-NEXT: cmp x9, #1000
-; CHECK-NEXT: ldp d2, d3, [x10]
-; CHECK-NEXT: fmadd d0, d1, d2, d0
-; CHECK-NEXT: ldr d1, [x1, x2, lsl #3]
; CHECK-NEXT: add x1, x1, x8
-; CHECK-NEXT: fmadd d0, d1, d3, d0
+; CHECK-NEXT: ldp d3, d4, [x10]
+; CHECK-NEXT: cmp x9, #1000
+; CHECK-NEXT: fmadd d0, d2, d3, d0
+; CHECK-NEXT: fmadd d1, d5, d4, d1
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
+; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll b/llvm/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll
index 045b1c72321a97..874a4aa800c411 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll
@@ -73,6 +73,13 @@ exit:
; NOHWPF-LABEL: loop2:
; NOHWPF-NEXT: phi
; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
@@ -106,6 +113,13 @@ exit:
; NOHWPF-NEXT: add
; NOHWPF-NEXT: add
; NOHWPF-NEXT: icmp
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
; NOHWPF-NEXT: br
; NOHWPF-NEXT-LABEL: exit2:
;
@@ -113,6 +127,9 @@ exit:
; CHECK-LABEL: loop2:
; CHECK-NEXT: phi
; CHECK-NEXT: phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
; CHECK-NEXT: getelementptr
; CHECK-NEXT: load
; CHECK-NEXT: add
@@ -130,6 +147,9 @@ exit:
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: icmp
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
; CHECK-NEXT: br
; CHECK-NEXT-LABEL: exit2:
diff --git a/llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll b/llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll
index 216bf489bc66ec..59208a6da76f62 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll
@@ -195,14 +195,15 @@ define i32 @test_i32_select_optsize(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-V8-NEXT: br label [[LOOP:%.*]]
; CHECK-V8: loop:
; CHECK-V8-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[COUNT_1:%.*]], [[LOOP]] ]
-; CHECK-V8-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-V8-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT:%.*]], [[LOOP]] ]
+; CHECK-V8-NEXT: [[ACC1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-V8-NEXT: [[ADDR_A:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]]
; CHECK-V8-NEXT: [[ADDR_B:%.*]] = getelementptr i32, ptr [[B:%.*]], i32 [[IV]]
; CHECK-V8-NEXT: [[DATA_A:%.*]] = load i32, ptr [[ADDR_A]], align 4
; CHECK-V8-NEXT: [[DATA_B:%.*]] = load i32, ptr [[ADDR_B]], align 4
; CHECK-V8-NEXT: [[UGT:%.*]] = icmp ugt i32 [[DATA_A]], [[DATA_B]]
; CHECK-V8-NEXT: [[UMAX:%.*]] = select i1 [[UGT]], i32 [[DATA_A]], i32 [[DATA_B]]
-; CHECK-V8-NEXT: [[ACC_NEXT:%.*]] = add i32 [[UMAX]], [[ACC]]
+; CHECK-V8-NEXT: [[ACC_NEXT]] = add i32 [[UMAX]], [[ACC]]
; CHECK-V8-NEXT: [[ADDR_C:%.*]] = getelementptr i32, ptr [[C:%.*]], i32 [[IV]]
; CHECK-V8-NEXT: store i32 [[UMAX]], ptr [[ADDR_C]], align 4
; CHECK-V8-NEXT: [[COUNT:%.*]] = add nuw nsw i32 [[IV]], 1
@@ -212,14 +213,15 @@ define i32 @test_i32_select_optsize(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-V8-NEXT: [[DATA_B_1:%.*]] = load i32, ptr [[ADDR_B_1]], align 4
; CHECK-V8-NEXT: [[UGT_1:%.*]] = icmp ugt i32 [[DATA_A_1]], [[DATA_B_1]]
; CHECK-V8-NEXT: [[UMAX_1:%.*]] = select i1 [[UGT_1]], i32 [[DATA_A_1]], i32 [[DATA_B_1]]
-; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
+; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC1]]
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
; CHECK-V8-NEXT: store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
+; CHECK-V8-NEXT: [[ACC_RED:%.*]] = add i32 [[ACC_NEXT]], [[ACC_NEXT_1]]
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK-V8: exit:
-; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
+; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_RED]], [[LOOP]] ]
; CHECK-V8-NEXT: ret i32 [[ACC_NEXT_LCSSA]]
;
entry:
@@ -251,14 +253,15 @@ define i32 @test_i32_select_minsize(ptr %a, ptr %b, ptr %c) #1 {
; CHECK-V8-NEXT: br label [[LOOP:%.*]]
; CHECK-V8: loop:
; CHECK-V8-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[COUNT_1:%.*]], [[LOOP]] ]
-; CHECK-V8-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-V8-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT:%.*]], [[LOOP]] ]
+; CHECK-V8-NEXT: [[ACC1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-V8-NEXT: [[ADDR_A:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]]
; CHECK-V8-NEXT: [[ADDR_B:%.*]] = getelementptr i32, ptr [[B:%.*]], i32 [[IV]]
; CHECK-V8-NEXT: [[DATA_A:%.*]] = load i32, ptr [[ADDR_A]], align 4
; CHECK-V8-NEXT: [[DATA_B:%.*]] = load i32, ptr [[ADDR_B]], align 4
; CHECK-V8-NEXT: [[UGT:%.*]] = icmp ugt i32 [[DATA_A]], [[DATA_B]]
; CHECK-V8-NEXT: [[UMAX:%.*]] = select i1 [[UGT]], i32 [[DATA_A]], i32 [[DATA_B]]
-; CHECK-V8-NEXT: [[ACC_NEXT:%.*]] = add i32 [[UMAX]], [[ACC]]
+; CHECK-V8-NEXT: [[ACC_NEXT]] = add i32 [[UMAX]], [[ACC]]
; CHECK-V8-NEXT: [[ADDR_C:%.*]] = getelementptr i32, ptr [[C:%.*]], i32 [[IV]]
; CHECK-V8-NEXT: store i32 [[UMAX]], ptr [[ADDR_C]], align 4
; CHECK-V8-NEXT: [[COUNT:%.*]] = add nuw nsw i32 [[IV]], 1
@@ -268,14 +271,15 @@ define i32 @test_i32_select_minsize(ptr %a, ptr %b, ptr %c) #1 {
; CHECK-V8-NEXT: [[DATA_B_1:%.*]] = load i32, ptr [[ADDR_B_1]], align 4
; CHECK-V8-NEXT: [[UGT_1:%.*]] = icmp ugt i32 [[DATA_A_1]], [[DATA_B_1]]
; CHECK-V8-NEXT: [[UMAX_1:%.*]] = select i1 [[UGT_1]], i32 [[DATA_A_1]], i32 [[DATA_B_1]]
-; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
+; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC1]]
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
; CHECK-V8-NEXT: store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
+; CHECK-V8-NEXT: [[ACC_RED:%.*]] = add i32 [[ACC_NEXT]], [[ACC_NEXT_1]]
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK-V8: exit:
-; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
+; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_RED]], [[LOOP]] ]
; CHECK-V8-NEXT: ret i32 [[ACC_NEXT_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopUnroll/X86/znver3.ll b/llvm/test/Transforms/LoopUnroll/X86/znver3.ll
index 30389062a09678..7047e9147a57b2 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/znver3.ll
+++ b/llvm/test/Transforms/LoopUnroll/X86/znver3.ll
@@ -10,523 +10,777 @@ define i32 @test(ptr %ary) "target-cpu"="znver3" {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_127:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_127:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_2:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM3:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM4:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_4:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM5:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_5:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM7:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM8:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_8:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM9:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_9:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM10:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_10:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_11:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM12:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_12:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM13:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_13:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM14:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_14:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM15:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_15:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM16:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_16:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM17:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_17:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM18:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_18:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM19:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_19:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM20:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_20:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM21:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_21:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM22:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_22:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM23:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_23:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM24:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_24:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM25:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_25:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM26:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_26:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM27:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_27:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM28:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_28:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM29:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_29:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM30:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_30:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM31:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_31:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM32:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_32:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM33:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_33:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM34:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_34:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM35:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_35:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM36:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_36:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM37:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_37:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM38:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_38:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM39:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_39:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM40:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_40:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM41:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_41:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM42:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_42:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM43:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_43:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM44:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_44:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM45:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_45:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM46:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_46:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM47:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_47:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM48:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_48:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM49:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_49:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM50:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_50:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM51:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_51:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM52:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_52:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM53:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_53:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM54:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_54:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM55:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_55:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM56:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_56:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM57:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_57:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM58:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_58:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM59:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_59:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM60:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_60:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM61:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_61:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM62:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_62:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM63:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_63:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM64:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_64:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM65:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_65:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM66:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_66:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM67:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_67:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM68:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_68:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM69:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_69:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM70:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_70:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM71:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_71:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM72:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_72:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM73:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_73:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM74:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_74:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM75:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_75:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM76:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_76:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM77:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_77:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM78:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_78:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM79:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_79:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM80:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_80:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM81:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_81:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM82:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_82:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM83:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_83:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM84:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_84:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM85:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_85:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM86:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_86:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM87:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_87:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM88:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_88:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM89:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_89:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM90:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_90:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM91:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_91:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM92:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_92:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM93:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_93:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM94:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_94:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM95:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_95:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM96:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_96:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM97:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_97:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM98:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_98:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM99:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_99:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM100:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_100:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM101:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_101:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM102:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_102:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM103:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_103:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM104:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_104:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM105:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_105:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM106:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_106:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM107:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_107:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM108:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_108:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM109:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_109:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM110:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_110:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM111:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_111:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM112:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_112:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM113:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_113:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM114:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_114:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM115:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_115:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM116:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_116:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM117:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_117:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM118:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_118:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM119:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_119:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM120:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_120:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM121:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_121:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM122:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_122:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM123:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_123:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM124:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_124:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM125:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_125:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM126:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_126:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM127:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_127:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[SUM_NEXT:%.*]] = add nsw i32 [[VAL]], [[SUM]]
+; CHECK-NEXT: [[SUM_NEXT]] = add nsw i32 [[VAL]], [[SUM]]
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[VAL_1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[SUM_NEXT_1:%.*]] = add nsw i32 [[VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT: [[SUM_NEXT_1]] = add nsw i32 [[VAL_1]], [[SUM1]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[VAL_2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[SUM_NEXT_2:%.*]] = add nsw i32 [[VAL_2]], [[SUM_NEXT_1]]
+; CHECK-NEXT: [[SUM_NEXT_2]] = add nsw i32 [[VAL_2]], [[SUM2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[VAL_3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[SUM_NEXT_3:%.*]] = add nsw i32 [[VAL_3]], [[SUM_NEXT_2]]
+; CHECK-NEXT: [[SUM_NEXT_3]] = add nsw i32 [[VAL_3]], [[SUM3]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: [[VAL_4:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[SUM_NEXT_4:%.*]] = add nsw i32 [[VAL_4]], [[SUM_NEXT_3]]
+; CHECK-NEXT: [[SUM_NEXT_4]] = add nsw i32 [[VAL_4]], [[SUM4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: [[VAL_5:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[SUM_NEXT_5:%.*]] = add nsw i32 [[VAL_5]], [[SUM_NEXT_4]]
+; CHECK-NEXT: [[SUM_NEXT_5]] = add nsw i32 [[VAL_5]], [[SUM5]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: [[VAL_6:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[SUM_NEXT_6:%.*]] = add nsw i32 [[VAL_6]], [[SUM_NEXT_5]]
+; CHECK-NEXT: [[SUM_NEXT_6]] = add nsw i32 [[VAL_6]], [[SUM6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: [[VAL_7:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[SUM_NEXT_7:%.*]] = add nsw i32 [[VAL_7]], [[SUM_NEXT_6]]
+; CHECK-NEXT: [[SUM_NEXT_7]] = add nsw i32 [[VAL_7]], [[SUM7]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_7]]
; CHECK-NEXT: [[VAL_8:%.*]] = load i32, ptr [[ARRAYIDX_8]], align 4
-; CHECK-NEXT: [[SUM_NEXT_8:%.*]] = add nsw i32 [[VAL_8]], [[SUM_NEXT_7]]
+; CHECK-NEXT: [[SUM_NEXT_8]] = add nsw i32 [[VAL_8]], [[SUM8]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_8:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 9
; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_8]]
; CHECK-NEXT: [[VAL_9:%.*]] = load i32, ptr [[ARRAYIDX_9]], align 4
-; CHECK-NEXT: [[SUM_NEXT_9:%.*]] = add nsw i32 [[VAL_9]], [[SUM_NEXT_8]]
+; CHECK-NEXT: [[SUM_NEXT_9]] = add nsw i32 [[VAL_9]], [[SUM9]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 10
; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_9]]
; CHECK-NEXT: [[VAL_10:%.*]] = load i32, ptr [[ARRAYIDX_10]], align 4
-; CHECK-NEXT: [[SUM_NEXT_10:%.*]] = add nsw i32 [[VAL_10]], [[SUM_NEXT_9]]
+; CHECK-NEXT: [[SUM_NEXT_10]] = add nsw i32 [[VAL_10]], [[SUM10]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_10:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 11
; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_10]]
; CHECK-NEXT: [[VAL_11:%.*]] = load i32, ptr [[ARRAYIDX_11]], align 4
-; CHECK-NEXT: [[SUM_NEXT_11:%.*]] = add nsw i32 [[VAL_11]], [[SUM_NEXT_10]]
+; CHECK-NEXT: [[SUM_NEXT_11]] = add nsw i32 [[VAL_11]], [[SUM11]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_11:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 12
; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_11]]
; CHECK-NEXT: [[VAL_12:%.*]] = load i32, ptr [[ARRAYIDX_12]], align 4
-; CHECK-NEXT: [[SUM_NEXT_12:%.*]] = add nsw i32 [[VAL_12]], [[SUM_NEXT_11]]
+; CHECK-NEXT: [[SUM_NEXT_12]] = add nsw i32 [[VAL_12]], [[SUM12]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_12:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 13
; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_12]]
; CHECK-NEXT: [[VAL_13:%.*]] = load i32, ptr [[ARRAYIDX_13]], align 4
-; CHECK-NEXT: [[SUM_NEXT_13:%.*]] = add nsw i32 [[VAL_13]], [[SUM_NEXT_12]]
+; CHECK-NEXT: [[SUM_NEXT_13]] = add nsw i32 [[VAL_13]], [[SUM13]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_13:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 14
; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_13]]
; CHECK-NEXT: [[VAL_14:%.*]] = load i32, ptr [[ARRAYIDX_14]], align 4
-; CHECK-NEXT: [[SUM_NEXT_14:%.*]] = add nsw i32 [[VAL_14]], [[SUM_NEXT_13]]
+; CHECK-NEXT: [[SUM_NEXT_14]] = add nsw i32 [[VAL_14]], [[SUM14]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_14:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 15
; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_14]]
; CHECK-NEXT: [[VAL_15:%.*]] = load i32, ptr [[ARRAYIDX_15]], align 4
-; CHECK-NEXT: [[SUM_NEXT_15:%.*]] = add nsw i32 [[VAL_15]], [[SUM_NEXT_14]]
+; CHECK-NEXT: [[SUM_NEXT_15]] = add nsw i32 [[VAL_15]], [[SUM15]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 16
; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_15]]
; CHECK-NEXT: [[VAL_16:%.*]] = load i32, ptr [[ARRAYIDX_16]], align 4
-; CHECK-NEXT: [[SUM_NEXT_16:%.*]] = add nsw i32 [[VAL_16]], [[SUM_NEXT_15]]
+; CHECK-NEXT: [[SUM_NEXT_16]] = add nsw i32 [[VAL_16]], [[SUM16]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_16:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 17
; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_16]]
; CHECK-NEXT: [[VAL_17:%.*]] = load i32, ptr [[ARRAYIDX_17]], align 4
-; CHECK-NEXT: [[SUM_NEXT_17:%.*]] = add nsw i32 [[VAL_17]], [[SUM_NEXT_16]]
+; CHECK-NEXT: [[SUM_NEXT_17]] = add nsw i32 [[VAL_17]], [[SUM17]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_17:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 18
; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_17]]
; CHECK-NEXT: [[VAL_18:%.*]] = load i32, ptr [[ARRAYIDX_18]], align 4
-; CHECK-NEXT: [[SUM_NEXT_18:%.*]] = add nsw i32 [[VAL_18]], [[SUM_NEXT_17]]
+; CHECK-NEXT: [[SUM_NEXT_18]] = add nsw i32 [[VAL_18]], [[SUM18]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_18:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 19
; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_18]]
; CHECK-NEXT: [[VAL_19:%.*]] = load i32, ptr [[ARRAYIDX_19]], align 4
-; CHECK-NEXT: [[SUM_NEXT_19:%.*]] = add nsw i32 [[VAL_19]], [[SUM_NEXT_18]]
+; CHECK-NEXT: [[SUM_NEXT_19]] = add nsw i32 [[VAL_19]], [[SUM19]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_19:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 20
; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_19]]
; CHECK-NEXT: [[VAL_20:%.*]] = load i32, ptr [[ARRAYIDX_20]], align 4
-; CHECK-NEXT: [[SUM_NEXT_20:%.*]] = add nsw i32 [[VAL_20]], [[SUM_NEXT_19]]
+; CHECK-NEXT: [[SUM_NEXT_20]] = add nsw i32 [[VAL_20]], [[SUM20]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_20:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 21
; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_20]]
; CHECK-NEXT: [[VAL_21:%.*]] = load i32, ptr [[ARRAYIDX_21]], align 4
-; CHECK-NEXT: [[SUM_NEXT_21:%.*]] = add nsw i32 [[VAL_21]], [[SUM_NEXT_20]]
+; CHECK-NEXT: [[SUM_NEXT_21]] = add nsw i32 [[VAL_21]], [[SUM21]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_21:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 22
; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_21]]
; CHECK-NEXT: [[VAL_22:%.*]] = load i32, ptr [[ARRAYIDX_22]], align 4
-; CHECK-NEXT: [[SUM_NEXT_22:%.*]] = add nsw i32 [[VAL_22]], [[SUM_NEXT_21]]
+; CHECK-NEXT: [[SUM_NEXT_22]] = add nsw i32 [[VAL_22]], [[SUM22]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_22:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 23
; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_22]]
; CHECK-NEXT: [[VAL_23:%.*]] = load i32, ptr [[ARRAYIDX_23]], align 4
-; CHECK-NEXT: [[SUM_NEXT_23:%.*]] = add nsw i32 [[VAL_23]], [[SUM_NEXT_22]]
+; CHECK-NEXT: [[SUM_NEXT_23]] = add nsw i32 [[VAL_23]], [[SUM23]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_23:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 24
; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_23]]
; CHECK-NEXT: [[VAL_24:%.*]] = load i32, ptr [[ARRAYIDX_24]], align 4
-; CHECK-NEXT: [[SUM_NEXT_24:%.*]] = add nsw i32 [[VAL_24]], [[SUM_NEXT_23]]
+; CHECK-NEXT: [[SUM_NEXT_24]] = add nsw i32 [[VAL_24]], [[SUM24]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_24:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 25
; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_24]]
; CHECK-NEXT: [[VAL_25:%.*]] = load i32, ptr [[ARRAYIDX_25]], align 4
-; CHECK-NEXT: [[SUM_NEXT_25:%.*]] = add nsw i32 [[VAL_25]], [[SUM_NEXT_24]]
+; CHECK-NEXT: [[SUM_NEXT_25]] = add nsw i32 [[VAL_25]], [[SUM25]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_25:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 26
; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_25]]
; CHECK-NEXT: [[VAL_26:%.*]] = load i32, ptr [[ARRAYIDX_26]], align 4
-; CHECK-NEXT: [[SUM_NEXT_26:%.*]] = add nsw i32 [[VAL_26]], [[SUM_NEXT_25]]
+; CHECK-NEXT: [[SUM_NEXT_26]] = add nsw i32 [[VAL_26]], [[SUM26]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_26:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 27
; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_26]]
; CHECK-NEXT: [[VAL_27:%.*]] = load i32, ptr [[ARRAYIDX_27]], align 4
-; CHECK-NEXT: [[SUM_NEXT_27:%.*]] = add nsw i32 [[VAL_27]], [[SUM_NEXT_26]]
+; CHECK-NEXT: [[SUM_NEXT_27]] = add nsw i32 [[VAL_27]], [[SUM27]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_27:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 28
; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_27]]
; CHECK-NEXT: [[VAL_28:%.*]] = load i32, ptr [[ARRAYIDX_28]], align 4
-; CHECK-NEXT: [[SUM_NEXT_28:%.*]] = add nsw i32 [[VAL_28]], [[SUM_NEXT_27]]
+; CHECK-NEXT: [[SUM_NEXT_28]] = add nsw i32 [[VAL_28]], [[SUM28]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_28:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 29
; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_28]]
; CHECK-NEXT: [[VAL_29:%.*]] = load i32, ptr [[ARRAYIDX_29]], align 4
-; CHECK-NEXT: [[SUM_NEXT_29:%.*]] = add nsw i32 [[VAL_29]], [[SUM_NEXT_28]]
+; CHECK-NEXT: [[SUM_NEXT_29]] = add nsw i32 [[VAL_29]], [[SUM29]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_29:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 30
; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_29]]
; CHECK-NEXT: [[VAL_30:%.*]] = load i32, ptr [[ARRAYIDX_30]], align 4
-; CHECK-NEXT: [[SUM_NEXT_30:%.*]] = add nsw i32 [[VAL_30]], [[SUM_NEXT_29]]
+; CHECK-NEXT: [[SUM_NEXT_30]] = add nsw i32 [[VAL_30]], [[SUM30]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_30:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 31
; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_30]]
; CHECK-NEXT: [[VAL_31:%.*]] = load i32, ptr [[ARRAYIDX_31]], align 4
-; CHECK-NEXT: [[SUM_NEXT_31:%.*]] = add nsw i32 [[VAL_31]], [[SUM_NEXT_30]]
+; CHECK-NEXT: [[SUM_NEXT_31]] = add nsw i32 [[VAL_31]], [[SUM31]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_31:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 32
; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_31]]
; CHECK-NEXT: [[VAL_32:%.*]] = load i32, ptr [[ARRAYIDX_32]], align 4
-; CHECK-NEXT: [[SUM_NEXT_32:%.*]] = add nsw i32 [[VAL_32]], [[SUM_NEXT_31]]
+; CHECK-NEXT: [[SUM_NEXT_32]] = add nsw i32 [[VAL_32]], [[SUM32]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_32:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 33
; CHECK-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_32]]
; CHECK-NEXT: [[VAL_33:%.*]] = load i32, ptr [[ARRAYIDX_33]], align 4
-; CHECK-NEXT: [[SUM_NEXT_33:%.*]] = add nsw i32 [[VAL_33]], [[SUM_NEXT_32]]
+; CHECK-NEXT: [[SUM_NEXT_33]] = add nsw i32 [[VAL_33]], [[SUM33]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_33:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 34
; CHECK-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_33]]
; CHECK-NEXT: [[VAL_34:%.*]] = load i32, ptr [[ARRAYIDX_34]], align 4
-; CHECK-NEXT: [[SUM_NEXT_34:%.*]] = add nsw i32 [[VAL_34]], [[SUM_NEXT_33]]
+; CHECK-NEXT: [[SUM_NEXT_34]] = add nsw i32 [[VAL_34]], [[SUM34]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_34:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 35
; CHECK-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_34]]
; CHECK-NEXT: [[VAL_35:%.*]] = load i32, ptr [[ARRAYIDX_35]], align 4
-; CHECK-NEXT: [[SUM_NEXT_35:%.*]] = add nsw i32 [[VAL_35]], [[SUM_NEXT_34]]
+; CHECK-NEXT: [[SUM_NEXT_35]] = add nsw i32 [[VAL_35]], [[SUM35]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_35:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 36
; CHECK-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_35]]
; CHECK-NEXT: [[VAL_36:%.*]] = load i32, ptr [[ARRAYIDX_36]], align 4
-; CHECK-NEXT: [[SUM_NEXT_36:%.*]] = add nsw i32 [[VAL_36]], [[SUM_NEXT_35]]
+; CHECK-NEXT: [[SUM_NEXT_36]] = add nsw i32 [[VAL_36]], [[SUM36]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_36:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 37
; CHECK-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_36]]
; CHECK-NEXT: [[VAL_37:%.*]] = load i32, ptr [[ARRAYIDX_37]], align 4
-; CHECK-NEXT: [[SUM_NEXT_37:%.*]] = add nsw i32 [[VAL_37]], [[SUM_NEXT_36]]
+; CHECK-NEXT: [[SUM_NEXT_37]] = add nsw i32 [[VAL_37]], [[SUM37]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_37:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 38
; CHECK-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_37]]
; CHECK-NEXT: [[VAL_38:%.*]] = load i32, ptr [[ARRAYIDX_38]], align 4
-; CHECK-NEXT: [[SUM_NEXT_38:%.*]] = add nsw i32 [[VAL_38]], [[SUM_NEXT_37]]
+; CHECK-NEXT: [[SUM_NEXT_38]] = add nsw i32 [[VAL_38]], [[SUM38]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_38:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 39
; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_38]]
; CHECK-NEXT: [[VAL_39:%.*]] = load i32, ptr [[ARRAYIDX_39]], align 4
-; CHECK-NEXT: [[SUM_NEXT_39:%.*]] = add nsw i32 [[VAL_39]], [[SUM_NEXT_38]]
+; CHECK-NEXT: [[SUM_NEXT_39]] = add nsw i32 [[VAL_39]], [[SUM39]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_39:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 40
; CHECK-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_39]]
; CHECK-NEXT: [[VAL_40:%.*]] = load i32, ptr [[ARRAYIDX_40]], align 4
-; CHECK-NEXT: [[SUM_NEXT_40:%.*]] = add nsw i32 [[VAL_40]], [[SUM_NEXT_39]]
+; CHECK-NEXT: [[SUM_NEXT_40]] = add nsw i32 [[VAL_40]], [[SUM40]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_40:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 41
; CHECK-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_40]]
; CHECK-NEXT: [[VAL_41:%.*]] = load i32, ptr [[ARRAYIDX_41]], align 4
-; CHECK-NEXT: [[SUM_NEXT_41:%.*]] = add nsw i32 [[VAL_41]], [[SUM_NEXT_40]]
+; CHECK-NEXT: [[SUM_NEXT_41]] = add nsw i32 [[VAL_41]], [[SUM41]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_41:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 42
; CHECK-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_41]]
; CHECK-NEXT: [[VAL_42:%.*]] = load i32, ptr [[ARRAYIDX_42]], align 4
-; CHECK-NEXT: [[SUM_NEXT_42:%.*]] = add nsw i32 [[VAL_42]], [[SUM_NEXT_41]]
+; CHECK-NEXT: [[SUM_NEXT_42]] = add nsw i32 [[VAL_42]], [[SUM42]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_42:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 43
; CHECK-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_42]]
; CHECK-NEXT: [[VAL_43:%.*]] = load i32, ptr [[ARRAYIDX_43]], align 4
-; CHECK-NEXT: [[SUM_NEXT_43:%.*]] = add nsw i32 [[VAL_43]], [[SUM_NEXT_42]]
+; CHECK-NEXT: [[SUM_NEXT_43]] = add nsw i32 [[VAL_43]], [[SUM43]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_43:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 44
; CHECK-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_43]]
; CHECK-NEXT: [[VAL_44:%.*]] = load i32, ptr [[ARRAYIDX_44]], align 4
-; CHECK-NEXT: [[SUM_NEXT_44:%.*]] = add nsw i32 [[VAL_44]], [[SUM_NEXT_43]]
+; CHECK-NEXT: [[SUM_NEXT_44]] = add nsw i32 [[VAL_44]], [[SUM44]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_44:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 45
; CHECK-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_44]]
; CHECK-NEXT: [[VAL_45:%.*]] = load i32, ptr [[ARRAYIDX_45]], align 4
-; CHECK-NEXT: [[SUM_NEXT_45:%.*]] = add nsw i32 [[VAL_45]], [[SUM_NEXT_44]]
+; CHECK-NEXT: [[SUM_NEXT_45]] = add nsw i32 [[VAL_45]], [[SUM45]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_45:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 46
; CHECK-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_45]]
; CHECK-NEXT: [[VAL_46:%.*]] = load i32, ptr [[ARRAYIDX_46]], align 4
-; CHECK-NEXT: [[SUM_NEXT_46:%.*]] = add nsw i32 [[VAL_46]], [[SUM_NEXT_45]]
+; CHECK-NEXT: [[SUM_NEXT_46]] = add nsw i32 [[VAL_46]], [[SUM46]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_46:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 47
; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_46]]
; CHECK-NEXT: [[VAL_47:%.*]] = load i32, ptr [[ARRAYIDX_47]], align 4
-; CHECK-NEXT: [[SUM_NEXT_47:%.*]] = add nsw i32 [[VAL_47]], [[SUM_NEXT_46]]
+; CHECK-NEXT: [[SUM_NEXT_47]] = add nsw i32 [[VAL_47]], [[SUM47]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_47:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 48
; CHECK-NEXT: [[ARRAYIDX_48:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_47]]
; CHECK-NEXT: [[VAL_48:%.*]] = load i32, ptr [[ARRAYIDX_48]], align 4
-; CHECK-NEXT: [[SUM_NEXT_48:%.*]] = add nsw i32 [[VAL_48]], [[SUM_NEXT_47]]
+; CHECK-NEXT: [[SUM_NEXT_48]] = add nsw i32 [[VAL_48]], [[SUM48]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_48:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 49
; CHECK-NEXT: [[ARRAYIDX_49:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_48]]
; CHECK-NEXT: [[VAL_49:%.*]] = load i32, ptr [[ARRAYIDX_49]], align 4
-; CHECK-NEXT: [[SUM_NEXT_49:%.*]] = add nsw i32 [[VAL_49]], [[SUM_NEXT_48]]
+; CHECK-NEXT: [[SUM_NEXT_49]] = add nsw i32 [[VAL_49]], [[SUM49]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_49:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 50
; CHECK-NEXT: [[ARRAYIDX_50:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_49]]
; CHECK-NEXT: [[VAL_50:%.*]] = load i32, ptr [[ARRAYIDX_50]], align 4
-; CHECK-NEXT: [[SUM_NEXT_50:%.*]] = add nsw i32 [[VAL_50]], [[SUM_NEXT_49]]
+; CHECK-NEXT: [[SUM_NEXT_50]] = add nsw i32 [[VAL_50]], [[SUM50]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_50:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 51
; CHECK-NEXT: [[ARRAYIDX_51:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_50]]
; CHECK-NEXT: [[VAL_51:%.*]] = load i32, ptr [[ARRAYIDX_51]], align 4
-; CHECK-NEXT: [[SUM_NEXT_51:%.*]] = add nsw i32 [[VAL_51]], [[SUM_NEXT_50]]
+; CHECK-NEXT: [[SUM_NEXT_51]] = add nsw i32 [[VAL_51]], [[SUM51]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_51:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 52
; CHECK-NEXT: [[ARRAYIDX_52:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_51]]
; CHECK-NEXT: [[VAL_52:%.*]] = load i32, ptr [[ARRAYIDX_52]], align 4
-; CHECK-NEXT: [[SUM_NEXT_52:%.*]] = add nsw i32 [[VAL_52]], [[SUM_NEXT_51]]
+; CHECK-NEXT: [[SUM_NEXT_52]] = add nsw i32 [[VAL_52]], [[SUM52]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_52:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 53
; CHECK-NEXT: [[ARRAYIDX_53:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_52]]
; CHECK-NEXT: [[VAL_53:%.*]] = load i32, ptr [[ARRAYIDX_53]], align 4
-; CHECK-NEXT: [[SUM_NEXT_53:%.*]] = add nsw i32 [[VAL_53]], [[SUM_NEXT_52]]
+; CHECK-NEXT: [[SUM_NEXT_53]] = add nsw i32 [[VAL_53]], [[SUM53]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_53:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 54
; CHECK-NEXT: [[ARRAYIDX_54:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_53]]
; CHECK-NEXT: [[VAL_54:%.*]] = load i32, ptr [[ARRAYIDX_54]], align 4
-; CHECK-NEXT: [[SUM_NEXT_54:%.*]] = add nsw i32 [[VAL_54]], [[SUM_NEXT_53]]
+; CHECK-NEXT: [[SUM_NEXT_54]] = add nsw i32 [[VAL_54]], [[SUM54]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_54:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 55
; CHECK-NEXT: [[ARRAYIDX_55:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_54]]
; CHECK-NEXT: [[VAL_55:%.*]] = load i32, ptr [[ARRAYIDX_55]], align 4
-; CHECK-NEXT: [[SUM_NEXT_55:%.*]] = add nsw i32 [[VAL_55]], [[SUM_NEXT_54]]
+; CHECK-NEXT: [[SUM_NEXT_55]] = add nsw i32 [[VAL_55]], [[SUM55]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_55:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 56
; CHECK-NEXT: [[ARRAYIDX_56:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_55]]
; CHECK-NEXT: [[VAL_56:%.*]] = load i32, ptr [[ARRAYIDX_56]], align 4
-; CHECK-NEXT: [[SUM_NEXT_56:%.*]] = add nsw i32 [[VAL_56]], [[SUM_NEXT_55]]
+; CHECK-NEXT: [[SUM_NEXT_56]] = add nsw i32 [[VAL_56]], [[SUM56]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_56:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 57
; CHECK-NEXT: [[ARRAYIDX_57:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_56]]
; CHECK-NEXT: [[VAL_57:%.*]] = load i32, ptr [[ARRAYIDX_57]], align 4
-; CHECK-NEXT: [[SUM_NEXT_57:%.*]] = add nsw i32 [[VAL_57]], [[SUM_NEXT_56]]
+; CHECK-NEXT: [[SUM_NEXT_57]] = add nsw i32 [[VAL_57]], [[SUM57]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_57:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 58
; CHECK-NEXT: [[ARRAYIDX_58:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_57]]
; CHECK-NEXT: [[VAL_58:%.*]] = load i32, ptr [[ARRAYIDX_58]], align 4
-; CHECK-NEXT: [[SUM_NEXT_58:%.*]] = add nsw i32 [[VAL_58]], [[SUM_NEXT_57]]
+; CHECK-NEXT: [[SUM_NEXT_58]] = add nsw i32 [[VAL_58]], [[SUM58]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_58:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 59
; CHECK-NEXT: [[ARRAYIDX_59:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_58]]
; CHECK-NEXT: [[VAL_59:%.*]] = load i32, ptr [[ARRAYIDX_59]], align 4
-; CHECK-NEXT: [[SUM_NEXT_59:%.*]] = add nsw i32 [[VAL_59]], [[SUM_NEXT_58]]
+; CHECK-NEXT: [[SUM_NEXT_59]] = add nsw i32 [[VAL_59]], [[SUM59]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_59:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 60
; CHECK-NEXT: [[ARRAYIDX_60:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_59]]
; CHECK-NEXT: [[VAL_60:%.*]] = load i32, ptr [[ARRAYIDX_60]], align 4
-; CHECK-NEXT: [[SUM_NEXT_60:%.*]] = add nsw i32 [[VAL_60]], [[SUM_NEXT_59]]
+; CHECK-NEXT: [[SUM_NEXT_60]] = add nsw i32 [[VAL_60]], [[SUM60]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_60:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 61
; CHECK-NEXT: [[ARRAYIDX_61:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_60]]
; CHECK-NEXT: [[VAL_61:%.*]] = load i32, ptr [[ARRAYIDX_61]], align 4
-; CHECK-NEXT: [[SUM_NEXT_61:%.*]] = add nsw i32 [[VAL_61]], [[SUM_NEXT_60]]
+; CHECK-NEXT: [[SUM_NEXT_61]] = add nsw i32 [[VAL_61]], [[SUM61]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_61:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 62
; CHECK-NEXT: [[ARRAYIDX_62:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_61]]
; CHECK-NEXT: [[VAL_62:%.*]] = load i32, ptr [[ARRAYIDX_62]], align 4
-; CHECK-NEXT: [[SUM_NEXT_62:%.*]] = add nsw i32 [[VAL_62]], [[SUM_NEXT_61]]
+; CHECK-NEXT: [[SUM_NEXT_62]] = add nsw i32 [[VAL_62]], [[SUM62]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_62:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 63
; CHECK-NEXT: [[ARRAYIDX_63:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_62]]
; CHECK-NEXT: [[VAL_63:%.*]] = load i32, ptr [[ARRAYIDX_63]], align 4
-; CHECK-NEXT: [[SUM_NEXT_63:%.*]] = add nsw i32 [[VAL_63]], [[SUM_NEXT_62]]
+; CHECK-NEXT: [[SUM_NEXT_63]] = add nsw i32 [[VAL_63]], [[SUM63]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_63:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 64
; CHECK-NEXT: [[ARRAYIDX_64:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_63]]
; CHECK-NEXT: [[VAL_64:%.*]] = load i32, ptr [[ARRAYIDX_64]], align 4
-; CHECK-NEXT: [[SUM_NEXT_64:%.*]] = add nsw i32 [[VAL_64]], [[SUM_NEXT_63]]
+; CHECK-NEXT: [[SUM_NEXT_64]] = add nsw i32 [[VAL_64]], [[SUM64]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_64:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 65
; CHECK-NEXT: [[ARRAYIDX_65:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_64]]
; CHECK-NEXT: [[VAL_65:%.*]] = load i32, ptr [[ARRAYIDX_65]], align 4
-; CHECK-NEXT: [[SUM_NEXT_65:%.*]] = add nsw i32 [[VAL_65]], [[SUM_NEXT_64]]
+; CHECK-NEXT: [[SUM_NEXT_65]] = add nsw i32 [[VAL_65]], [[SUM65]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_65:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 66
; CHECK-NEXT: [[ARRAYIDX_66:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_65]]
; CHECK-NEXT: [[VAL_66:%.*]] = load i32, ptr [[ARRAYIDX_66]], align 4
-; CHECK-NEXT: [[SUM_NEXT_66:%.*]] = add nsw i32 [[VAL_66]], [[SUM_NEXT_65]]
+; CHECK-NEXT: [[SUM_NEXT_66]] = add nsw i32 [[VAL_66]], [[SUM66]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_66:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 67
; CHECK-NEXT: [[ARRAYIDX_67:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_66]]
; CHECK-NEXT: [[VAL_67:%.*]] = load i32, ptr [[ARRAYIDX_67]], align 4
-; CHECK-NEXT: [[SUM_NEXT_67:%.*]] = add nsw i32 [[VAL_67]], [[SUM_NEXT_66]]
+; CHECK-NEXT: [[SUM_NEXT_67]] = add nsw i32 [[VAL_67]], [[SUM67]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_67:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 68
; CHECK-NEXT: [[ARRAYIDX_68:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_67]]
; CHECK-NEXT: [[VAL_68:%.*]] = load i32, ptr [[ARRAYIDX_68]], align 4
-; CHECK-NEXT: [[SUM_NEXT_68:%.*]] = add nsw i32 [[VAL_68]], [[SUM_NEXT_67]]
+; CHECK-NEXT: [[SUM_NEXT_68]] = add nsw i32 [[VAL_68]], [[SUM68]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_68:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 69
; CHECK-NEXT: [[ARRAYIDX_69:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_68]]
; CHECK-NEXT: [[VAL_69:%.*]] = load i32, ptr [[ARRAYIDX_69]], align 4
-; CHECK-NEXT: [[SUM_NEXT_69:%.*]] = add nsw i32 [[VAL_69]], [[SUM_NEXT_68]]
+; CHECK-NEXT: [[SUM_NEXT_69]] = add nsw i32 [[VAL_69]], [[SUM69]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_69:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 70
; CHECK-NEXT: [[ARRAYIDX_70:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_69]]
; CHECK-NEXT: [[VAL_70:%.*]] = load i32, ptr [[ARRAYIDX_70]], align 4
-; CHECK-NEXT: [[SUM_NEXT_70:%.*]] = add nsw i32 [[VAL_70]], [[SUM_NEXT_69]]
+; CHECK-NEXT: [[SUM_NEXT_70]] = add nsw i32 [[VAL_70]], [[SUM70]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_70:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 71
; CHECK-NEXT: [[ARRAYIDX_71:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_70]]
; CHECK-NEXT: [[VAL_71:%.*]] = load i32, ptr [[ARRAYIDX_71]], align 4
-; CHECK-NEXT: [[SUM_NEXT_71:%.*]] = add nsw i32 [[VAL_71]], [[SUM_NEXT_70]]
+; CHECK-NEXT: [[SUM_NEXT_71]] = add nsw i32 [[VAL_71]], [[SUM71]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_71:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 72
; CHECK-NEXT: [[ARRAYIDX_72:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_71]]
; CHECK-NEXT: [[VAL_72:%.*]] = load i32, ptr [[ARRAYIDX_72]], align 4
-; CHECK-NEXT: [[SUM_NEXT_72:%.*]] = add nsw i32 [[VAL_72]], [[SUM_NEXT_71]]
+; CHECK-NEXT: [[SUM_NEXT_72]] = add nsw i32 [[VAL_72]], [[SUM72]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_72:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 73
; CHECK-NEXT: [[ARRAYIDX_73:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_72]]
; CHECK-NEXT: [[VAL_73:%.*]] = load i32, ptr [[ARRAYIDX_73]], align 4
-; CHECK-NEXT: [[SUM_NEXT_73:%.*]] = add nsw i32 [[VAL_73]], [[SUM_NEXT_72]]
+; CHECK-NEXT: [[SUM_NEXT_73]] = add nsw i32 [[VAL_73]], [[SUM73]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_73:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 74
; CHECK-NEXT: [[ARRAYIDX_74:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_73]]
; CHECK-NEXT: [[VAL_74:%.*]] = load i32, ptr [[ARRAYIDX_74]], align 4
-; CHECK-NEXT: [[SUM_NEXT_74:%.*]] = add nsw i32 [[VAL_74]], [[SUM_NEXT_73]]
+; CHECK-NEXT: [[SUM_NEXT_74]] = add nsw i32 [[VAL_74]], [[SUM74]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_74:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 75
; CHECK-NEXT: [[ARRAYIDX_75:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_74]]
; CHECK-NEXT: [[VAL_75:%.*]] = load i32, ptr [[ARRAYIDX_75]], align 4
-; CHECK-NEXT: [[SUM_NEXT_75:%.*]] = add nsw i32 [[VAL_75]], [[SUM_NEXT_74]]
+; CHECK-NEXT: [[SUM_NEXT_75]] = add nsw i32 [[VAL_75]], [[SUM75]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_75:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 76
; CHECK-NEXT: [[ARRAYIDX_76:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_75]]
; CHECK-NEXT: [[VAL_76:%.*]] = load i32, ptr [[ARRAYIDX_76]], align 4
-; CHECK-NEXT: [[SUM_NEXT_76:%.*]] = add nsw i32 [[VAL_76]], [[SUM_NEXT_75]]
+; CHECK-NEXT: [[SUM_NEXT_76]] = add nsw i32 [[VAL_76]], [[SUM76]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_76:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 77
; CHECK-NEXT: [[ARRAYIDX_77:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_76]]
; CHECK-NEXT: [[VAL_77:%.*]] = load i32, ptr [[ARRAYIDX_77]], align 4
-; CHECK-NEXT: [[SUM_NEXT_77:%.*]] = add nsw i32 [[VAL_77]], [[SUM_NEXT_76]]
+; CHECK-NEXT: [[SUM_NEXT_77]] = add nsw i32 [[VAL_77]], [[SUM77]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_77:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 78
; CHECK-NEXT: [[ARRAYIDX_78:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_77]]
; CHECK-NEXT: [[VAL_78:%.*]] = load i32, ptr [[ARRAYIDX_78]], align 4
-; CHECK-NEXT: [[SUM_NEXT_78:%.*]] = add nsw i32 [[VAL_78]], [[SUM_NEXT_77]]
+; CHECK-NEXT: [[SUM_NEXT_78]] = add nsw i32 [[VAL_78]], [[SUM78]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_78:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 79
; CHECK-NEXT: [[ARRAYIDX_79:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_78]]
; CHECK-NEXT: [[VAL_79:%.*]] = load i32, ptr [[ARRAYIDX_79]], align 4
-; CHECK-NEXT: [[SUM_NEXT_79:%.*]] = add nsw i32 [[VAL_79]], [[SUM_NEXT_78]]
+; CHECK-NEXT: [[SUM_NEXT_79]] = add nsw i32 [[VAL_79]], [[SUM79]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_79:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 80
; CHECK-NEXT: [[ARRAYIDX_80:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_79]]
; CHECK-NEXT: [[VAL_80:%.*]] = load i32, ptr [[ARRAYIDX_80]], align 4
-; CHECK-NEXT: [[SUM_NEXT_80:%.*]] = add nsw i32 [[VAL_80]], [[SUM_NEXT_79]]
+; CHECK-NEXT: [[SUM_NEXT_80]] = add nsw i32 [[VAL_80]], [[SUM80]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_80:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 81
; CHECK-NEXT: [[ARRAYIDX_81:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_80]]
; CHECK-NEXT: [[VAL_81:%.*]] = load i32, ptr [[ARRAYIDX_81]], align 4
-; CHECK-NEXT: [[SUM_NEXT_81:%.*]] = add nsw i32 [[VAL_81]], [[SUM_NEXT_80]]
+; CHECK-NEXT: [[SUM_NEXT_81]] = add nsw i32 [[VAL_81]], [[SUM81]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_81:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 82
; CHECK-NEXT: [[ARRAYIDX_82:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_81]]
; CHECK-NEXT: [[VAL_82:%.*]] = load i32, ptr [[ARRAYIDX_82]], align 4
-; CHECK-NEXT: [[SUM_NEXT_82:%.*]] = add nsw i32 [[VAL_82]], [[SUM_NEXT_81]]
+; CHECK-NEXT: [[SUM_NEXT_82]] = add nsw i32 [[VAL_82]], [[SUM82]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_82:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 83
; CHECK-NEXT: [[ARRAYIDX_83:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_82]]
; CHECK-NEXT: [[VAL_83:%.*]] = load i32, ptr [[ARRAYIDX_83]], align 4
-; CHECK-NEXT: [[SUM_NEXT_83:%.*]] = add nsw i32 [[VAL_83]], [[SUM_NEXT_82]]
+; CHECK-NEXT: [[SUM_NEXT_83]] = add nsw i32 [[VAL_83]], [[SUM83]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_83:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 84
; CHECK-NEXT: [[ARRAYIDX_84:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_83]]
; CHECK-NEXT: [[VAL_84:%.*]] = load i32, ptr [[ARRAYIDX_84]], align 4
-; CHECK-NEXT: [[SUM_NEXT_84:%.*]] = add nsw i32 [[VAL_84]], [[SUM_NEXT_83]]
+; CHECK-NEXT: [[SUM_NEXT_84]] = add nsw i32 [[VAL_84]], [[SUM84]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_84:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 85
; CHECK-NEXT: [[ARRAYIDX_85:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_84]]
; CHECK-NEXT: [[VAL_85:%.*]] = load i32, ptr [[ARRAYIDX_85]], align 4
-; CHECK-NEXT: [[SUM_NEXT_85:%.*]] = add nsw i32 [[VAL_85]], [[SUM_NEXT_84]]
+; CHECK-NEXT: [[SUM_NEXT_85]] = add nsw i32 [[VAL_85]], [[SUM85]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_85:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 86
; CHECK-NEXT: [[ARRAYIDX_86:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_85]]
; CHECK-NEXT: [[VAL_86:%.*]] = load i32, ptr [[ARRAYIDX_86]], align 4
-; CHECK-NEXT: [[SUM_NEXT_86:%.*]] = add nsw i32 [[VAL_86]], [[SUM_NEXT_85]]
+; CHECK-NEXT: [[SUM_NEXT_86]] = add nsw i32 [[VAL_86]], [[SUM86]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_86:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 87
; CHECK-NEXT: [[ARRAYIDX_87:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_86]]
; CHECK-NEXT: [[VAL_87:%.*]] = load i32, ptr [[ARRAYIDX_87]], align 4
-; CHECK-NEXT: [[SUM_NEXT_87:%.*]] = add nsw i32 [[VAL_87]], [[SUM_NEXT_86]]
+; CHECK-NEXT: [[SUM_NEXT_87]] = add nsw i32 [[VAL_87]], [[SUM87]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_87:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 88
; CHECK-NEXT: [[ARRAYIDX_88:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_87]]
; CHECK-NEXT: [[VAL_88:%.*]] = load i32, ptr [[ARRAYIDX_88]], align 4
-; CHECK-NEXT: [[SUM_NEXT_88:%.*]] = add nsw i32 [[VAL_88]], [[SUM_NEXT_87]]
+; CHECK-NEXT: [[SUM_NEXT_88]] = add nsw i32 [[VAL_88]], [[SUM88]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_88:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 89
; CHECK-NEXT: [[ARRAYIDX_89:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_88]]
; CHECK-NEXT: [[VAL_89:%.*]] = load i32, ptr [[ARRAYIDX_89]], align 4
-; CHECK-NEXT: [[SUM_NEXT_89:%.*]] = add nsw i32 [[VAL_89]], [[SUM_NEXT_88]]
+; CHECK-NEXT: [[SUM_NEXT_89]] = add nsw i32 [[VAL_89]], [[SUM89]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_89:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 90
; CHECK-NEXT: [[ARRAYIDX_90:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_89]]
; CHECK-NEXT: [[VAL_90:%.*]] = load i32, ptr [[ARRAYIDX_90]], align 4
-; CHECK-NEXT: [[SUM_NEXT_90:%.*]] = add nsw i32 [[VAL_90]], [[SUM_NEXT_89]]
+; CHECK-NEXT: [[SUM_NEXT_90]] = add nsw i32 [[VAL_90]], [[SUM90]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_90:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 91
; CHECK-NEXT: [[ARRAYIDX_91:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_90]]
; CHECK-NEXT: [[VAL_91:%.*]] = load i32, ptr [[ARRAYIDX_91]], align 4
-; CHECK-NEXT: [[SUM_NEXT_91:%.*]] = add nsw i32 [[VAL_91]], [[SUM_NEXT_90]]
+; CHECK-NEXT: [[SUM_NEXT_91]] = add nsw i32 [[VAL_91]], [[SUM91]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_91:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 92
; CHECK-NEXT: [[ARRAYIDX_92:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_91]]
; CHECK-NEXT: [[VAL_92:%.*]] = load i32, ptr [[ARRAYIDX_92]], align 4
-; CHECK-NEXT: [[SUM_NEXT_92:%.*]] = add nsw i32 [[VAL_92]], [[SUM_NEXT_91]]
+; CHECK-NEXT: [[SUM_NEXT_92]] = add nsw i32 [[VAL_92]], [[SUM92]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_92:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 93
; CHECK-NEXT: [[ARRAYIDX_93:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_92]]
; CHECK-NEXT: [[VAL_93:%.*]] = load i32, ptr [[ARRAYIDX_93]], align 4
-; CHECK-NEXT: [[SUM_NEXT_93:%.*]] = add nsw i32 [[VAL_93]], [[SUM_NEXT_92]]
+; CHECK-NEXT: [[SUM_NEXT_93]] = add nsw i32 [[VAL_93]], [[SUM93]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_93:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 94
; CHECK-NEXT: [[ARRAYIDX_94:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_93]]
; CHECK-NEXT: [[VAL_94:%.*]] = load i32, ptr [[ARRAYIDX_94]], align 4
-; CHECK-NEXT: [[SUM_NEXT_94:%.*]] = add nsw i32 [[VAL_94]], [[SUM_NEXT_93]]
+; CHECK-NEXT: [[SUM_NEXT_94]] = add nsw i32 [[VAL_94]], [[SUM94]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_94:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 95
; CHECK-NEXT: [[ARRAYIDX_95:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_94]]
; CHECK-NEXT: [[VAL_95:%.*]] = load i32, ptr [[ARRAYIDX_95]], align 4
-; CHECK-NEXT: [[SUM_NEXT_95:%.*]] = add nsw i32 [[VAL_95]], [[SUM_NEXT_94]]
+; CHECK-NEXT: [[SUM_NEXT_95]] = add nsw i32 [[VAL_95]], [[SUM95]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_95:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 96
; CHECK-NEXT: [[ARRAYIDX_96:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_95]]
; CHECK-NEXT: [[VAL_96:%.*]] = load i32, ptr [[ARRAYIDX_96]], align 4
-; CHECK-NEXT: [[SUM_NEXT_96:%.*]] = add nsw i32 [[VAL_96]], [[SUM_NEXT_95]]
+; CHECK-NEXT: [[SUM_NEXT_96]] = add nsw i32 [[VAL_96]], [[SUM96]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_96:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 97
; CHECK-NEXT: [[ARRAYIDX_97:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_96]]
; CHECK-NEXT: [[VAL_97:%.*]] = load i32, ptr [[ARRAYIDX_97]], align 4
-; CHECK-NEXT: [[SUM_NEXT_97:%.*]] = add nsw i32 [[VAL_97]], [[SUM_NEXT_96]]
+; CHECK-NEXT: [[SUM_NEXT_97]] = add nsw i32 [[VAL_97]], [[SUM97]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_97:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 98
; CHECK-NEXT: [[ARRAYIDX_98:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_97]]
; CHECK-NEXT: [[VAL_98:%.*]] = load i32, ptr [[ARRAYIDX_98]], align 4
-; CHECK-NEXT: [[SUM_NEXT_98:%.*]] = add nsw i32 [[VAL_98]], [[SUM_NEXT_97]]
+; CHECK-NEXT: [[SUM_NEXT_98]] = add nsw i32 [[VAL_98]], [[SUM98]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_98:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 99
; CHECK-NEXT: [[ARRAYIDX_99:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_98]]
; CHECK-NEXT: [[VAL_99:%.*]] = load i32, ptr [[ARRAYIDX_99]], align 4
-; CHECK-NEXT: [[SUM_NEXT_99:%.*]] = add nsw i32 [[VAL_99]], [[SUM_NEXT_98]]
+; CHECK-NEXT: [[SUM_NEXT_99]] = add nsw i32 [[VAL_99]], [[SUM99]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_99:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 100
; CHECK-NEXT: [[ARRAYIDX_100:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_99]]
; CHECK-NEXT: [[VAL_100:%.*]] = load i32, ptr [[ARRAYIDX_100]], align 4
-; CHECK-NEXT: [[SUM_NEXT_100:%.*]] = add nsw i32 [[VAL_100]], [[SUM_NEXT_99]]
+; CHECK-NEXT: [[SUM_NEXT_100]] = add nsw i32 [[VAL_100]], [[SUM100]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_100:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 101
; CHECK-NEXT: [[ARRAYIDX_101:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_100]]
; CHECK-NEXT: [[VAL_101:%.*]] = load i32, ptr [[ARRAYIDX_101]], align 4
-; CHECK-NEXT: [[SUM_NEXT_101:%.*]] = add nsw i32 [[VAL_101]], [[SUM_NEXT_100]]
+; CHECK-NEXT: [[SUM_NEXT_101]] = add nsw i32 [[VAL_101]], [[SUM101]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_101:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 102
; CHECK-NEXT: [[ARRAYIDX_102:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_101]]
; CHECK-NEXT: [[VAL_102:%.*]] = load i32, ptr [[ARRAYIDX_102]], align 4
-; CHECK-NEXT: [[SUM_NEXT_102:%.*]] = add nsw i32 [[VAL_102]], [[SUM_NEXT_101]]
+; CHECK-NEXT: [[SUM_NEXT_102]] = add nsw i32 [[VAL_102]], [[SUM102]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_102:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 103
; CHECK-NEXT: [[ARRAYIDX_103:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_102]]
; CHECK-NEXT: [[VAL_103:%.*]] = load i32, ptr [[ARRAYIDX_103]], align 4
-; CHECK-NEXT: [[SUM_NEXT_103:%.*]] = add nsw i32 [[VAL_103]], [[SUM_NEXT_102]]
+; CHECK-NEXT: [[SUM_NEXT_103]] = add nsw i32 [[VAL_103]], [[SUM103]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_103:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 104
; CHECK-NEXT: [[ARRAYIDX_104:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_103]]
; CHECK-NEXT: [[VAL_104:%.*]] = load i32, ptr [[ARRAYIDX_104]], align 4
-; CHECK-NEXT: [[SUM_NEXT_104:%.*]] = add nsw i32 [[VAL_104]], [[SUM_NEXT_103]]
+; CHECK-NEXT: [[SUM_NEXT_104]] = add nsw i32 [[VAL_104]], [[SUM104]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_104:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 105
; CHECK-NEXT: [[ARRAYIDX_105:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_104]]
; CHECK-NEXT: [[VAL_105:%.*]] = load i32, ptr [[ARRAYIDX_105]], align 4
-; CHECK-NEXT: [[SUM_NEXT_105:%.*]] = add nsw i32 [[VAL_105]], [[SUM_NEXT_104]]
+; CHECK-NEXT: [[SUM_NEXT_105]] = add nsw i32 [[VAL_105]], [[SUM105]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_105:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 106
; CHECK-NEXT: [[ARRAYIDX_106:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_105]]
; CHECK-NEXT: [[VAL_106:%.*]] = load i32, ptr [[ARRAYIDX_106]], align 4
-; CHECK-NEXT: [[SUM_NEXT_106:%.*]] = add nsw i32 [[VAL_106]], [[SUM_NEXT_105]]
+; CHECK-NEXT: [[SUM_NEXT_106]] = add nsw i32 [[VAL_106]], [[SUM106]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_106:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 107
; CHECK-NEXT: [[ARRAYIDX_107:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_106]]
; CHECK-NEXT: [[VAL_107:%.*]] = load i32, ptr [[ARRAYIDX_107]], align 4
-; CHECK-NEXT: [[SUM_NEXT_107:%.*]] = add nsw i32 [[VAL_107]], [[SUM_NEXT_106]]
+; CHECK-NEXT: [[SUM_NEXT_107]] = add nsw i32 [[VAL_107]], [[SUM107]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_107:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 108
; CHECK-NEXT: [[ARRAYIDX_108:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_107]]
; CHECK-NEXT: [[VAL_108:%.*]] = load i32, ptr [[ARRAYIDX_108]], align 4
-; CHECK-NEXT: [[SUM_NEXT_108:%.*]] = add nsw i32 [[VAL_108]], [[SUM_NEXT_107]]
+; CHECK-NEXT: [[SUM_NEXT_108]] = add nsw i32 [[VAL_108]], [[SUM108]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_108:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 109
; CHECK-NEXT: [[ARRAYIDX_109:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_108]]
; CHECK-NEXT: [[VAL_109:%.*]] = load i32, ptr [[ARRAYIDX_109]], align 4
-; CHECK-NEXT: [[SUM_NEXT_109:%.*]] = add nsw i32 [[VAL_109]], [[SUM_NEXT_108]]
+; CHECK-NEXT: [[SUM_NEXT_109]] = add nsw i32 [[VAL_109]], [[SUM109]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_109:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 110
; CHECK-NEXT: [[ARRAYIDX_110:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_109]]
; CHECK-NEXT: [[VAL_110:%.*]] = load i32, ptr [[ARRAYIDX_110]], align 4
-; CHECK-NEXT: [[SUM_NEXT_110:%.*]] = add nsw i32 [[VAL_110]], [[SUM_NEXT_109]]
+; CHECK-NEXT: [[SUM_NEXT_110]] = add nsw i32 [[VAL_110]], [[SUM110]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_110:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 111
; CHECK-NEXT: [[ARRAYIDX_111:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_110]]
; CHECK-NEXT: [[VAL_111:%.*]] = load i32, ptr [[ARRAYIDX_111]], align 4
-; CHECK-NEXT: [[SUM_NEXT_111:%.*]] = add nsw i32 [[VAL_111]], [[SUM_NEXT_110]]
+; CHECK-NEXT: [[SUM_NEXT_111]] = add nsw i32 [[VAL_111]], [[SUM111]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_111:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 112
; CHECK-NEXT: [[ARRAYIDX_112:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_111]]
; CHECK-NEXT: [[VAL_112:%.*]] = load i32, ptr [[ARRAYIDX_112]], align 4
-; CHECK-NEXT: [[SUM_NEXT_112:%.*]] = add nsw i32 [[VAL_112]], [[SUM_NEXT_111]]
+; CHECK-NEXT: [[SUM_NEXT_112]] = add nsw i32 [[VAL_112]], [[SUM112]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_112:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 113
; CHECK-NEXT: [[ARRAYIDX_113:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_112]]
; CHECK-NEXT: [[VAL_113:%.*]] = load i32, ptr [[ARRAYIDX_113]], align 4
-; CHECK-NEXT: [[SUM_NEXT_113:%.*]] = add nsw i32 [[VAL_113]], [[SUM_NEXT_112]]
+; CHECK-NEXT: [[SUM_NEXT_113]] = add nsw i32 [[VAL_113]], [[SUM113]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_113:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 114
; CHECK-NEXT: [[ARRAYIDX_114:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_113]]
; CHECK-NEXT: [[VAL_114:%.*]] = load i32, ptr [[ARRAYIDX_114]], align 4
-; CHECK-NEXT: [[SUM_NEXT_114:%.*]] = add nsw i32 [[VAL_114]], [[SUM_NEXT_113]]
+; CHECK-NEXT: [[SUM_NEXT_114]] = add nsw i32 [[VAL_114]], [[SUM114]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_114:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 115
; CHECK-NEXT: [[ARRAYIDX_115:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_114]]
; CHECK-NEXT: [[VAL_115:%.*]] = load i32, ptr [[ARRAYIDX_115]], align 4
-; CHECK-NEXT: [[SUM_NEXT_115:%.*]] = add nsw i32 [[VAL_115]], [[SUM_NEXT_114]]
+; CHECK-NEXT: [[SUM_NEXT_115]] = add nsw i32 [[VAL_115]], [[SUM115]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_115:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 116
; CHECK-NEXT: [[ARRAYIDX_116:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_115]]
; CHECK-NEXT: [[VAL_116:%.*]] = load i32, ptr [[ARRAYIDX_116]], align 4
-; CHECK-NEXT: [[SUM_NEXT_116:%.*]] = add nsw i32 [[VAL_116]], [[SUM_NEXT_115]]
+; CHECK-NEXT: [[SUM_NEXT_116]] = add nsw i32 [[VAL_116]], [[SUM116]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_116:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 117
; CHECK-NEXT: [[ARRAYIDX_117:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_116]]
; CHECK-NEXT: [[VAL_117:%.*]] = load i32, ptr [[ARRAYIDX_117]], align 4
-; CHECK-NEXT: [[SUM_NEXT_117:%.*]] = add nsw i32 [[VAL_117]], [[SUM_NEXT_116]]
+; CHECK-NEXT: [[SUM_NEXT_117]] = add nsw i32 [[VAL_117]], [[SUM117]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_117:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 118
; CHECK-NEXT: [[ARRAYIDX_118:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_117]]
; CHECK-NEXT: [[VAL_118:%.*]] = load i32, ptr [[ARRAYIDX_118]], align 4
-; CHECK-NEXT: [[SUM_NEXT_118:%.*]] = add nsw i32 [[VAL_118]], [[SUM_NEXT_117]]
+; CHECK-NEXT: [[SUM_NEXT_118]] = add nsw i32 [[VAL_118]], [[SUM118]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_118:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 119
; CHECK-NEXT: [[ARRAYIDX_119:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_118]]
; CHECK-NEXT: [[VAL_119:%.*]] = load i32, ptr [[ARRAYIDX_119]], align 4
-; CHECK-NEXT: [[SUM_NEXT_119:%.*]] = add nsw i32 [[VAL_119]], [[SUM_NEXT_118]]
+; CHECK-NEXT: [[SUM_NEXT_119]] = add nsw i32 [[VAL_119]], [[SUM119]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_119:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 120
; CHECK-NEXT: [[ARRAYIDX_120:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_119]]
; CHECK-NEXT: [[VAL_120:%.*]] = load i32, ptr [[ARRAYIDX_120]], align 4
-; CHECK-NEXT: [[SUM_NEXT_120:%.*]] = add nsw i32 [[VAL_120]], [[SUM_NEXT_119]]
+; CHECK-NEXT: [[SUM_NEXT_120]] = add nsw i32 [[VAL_120]], [[SUM120]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_120:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 121
; CHECK-NEXT: [[ARRAYIDX_121:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_120]]
; CHECK-NEXT: [[VAL_121:%.*]] = load i32, ptr [[ARRAYIDX_121]], align 4
-; CHECK-NEXT: [[SUM_NEXT_121:%.*]] = add nsw i32 [[VAL_121]], [[SUM_NEXT_120]]
+; CHECK-NEXT: [[SUM_NEXT_121]] = add nsw i32 [[VAL_121]], [[SUM121]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_121:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 122
; CHECK-NEXT: [[ARRAYIDX_122:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_121]]
; CHECK-NEXT: [[VAL_122:%.*]] = load i32, ptr [[ARRAYIDX_122]], align 4
-; CHECK-NEXT: [[SUM_NEXT_122:%.*]] = add nsw i32 [[VAL_122]], [[SUM_NEXT_121]]
+; CHECK-NEXT: [[SUM_NEXT_122]] = add nsw i32 [[VAL_122]], [[SUM122]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_122:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 123
; CHECK-NEXT: [[ARRAYIDX_123:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_122]]
; CHECK-NEXT: [[VAL_123:%.*]] = load i32, ptr [[ARRAYIDX_123]], align 4
-; CHECK-NEXT: [[SUM_NEXT_123:%.*]] = add nsw i32 [[VAL_123]], [[SUM_NEXT_122]]
+; CHECK-NEXT: [[SUM_NEXT_123]] = add nsw i32 [[VAL_123]], [[SUM123]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_123:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 124
; CHECK-NEXT: [[ARRAYIDX_124:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_123]]
; CHECK-NEXT: [[VAL_124:%.*]] = load i32, ptr [[ARRAYIDX_124]], align 4
-; CHECK-NEXT: [[SUM_NEXT_124:%.*]] = add nsw i32 [[VAL_124]], [[SUM_NEXT_123]]
+; CHECK-NEXT: [[SUM_NEXT_124]] = add nsw i32 [[VAL_124]], [[SUM124]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_124:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 125
; CHECK-NEXT: [[ARRAYIDX_125:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_124]]
; CHECK-NEXT: [[VAL_125:%.*]] = load i32, ptr [[ARRAYIDX_125]], align 4
-; CHECK-NEXT: [[SUM_NEXT_125:%.*]] = add nsw i32 [[VAL_125]], [[SUM_NEXT_124]]
+; CHECK-NEXT: [[SUM_NEXT_125]] = add nsw i32 [[VAL_125]], [[SUM125]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_125:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 126
; CHECK-NEXT: [[ARRAYIDX_126:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_125]]
; CHECK-NEXT: [[VAL_126:%.*]] = load i32, ptr [[ARRAYIDX_126]], align 4
-; CHECK-NEXT: [[SUM_NEXT_126:%.*]] = add nsw i32 [[VAL_126]], [[SUM_NEXT_125]]
+; CHECK-NEXT: [[SUM_NEXT_126]] = add nsw i32 [[VAL_126]], [[SUM126]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_126:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 127
; CHECK-NEXT: [[ARRAYIDX_127:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_126]]
; CHECK-NEXT: [[VAL_127:%.*]] = load i32, ptr [[ARRAYIDX_127]], align 4
-; CHECK-NEXT: [[SUM_NEXT_127]] = add nsw i32 [[VAL_127]], [[SUM_NEXT_126]]
+; CHECK-NEXT: [[SUM_NEXT_127]] = add nsw i32 [[VAL_127]], [[SUM127]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_127]] = add nuw nsw i64 [[INDVARS_IV]], 128
; CHECK-NEXT: [[EXITCOND_NOT_127:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_127]], 8192
+; CHECK-NEXT: [[SUM_RED254:%.*]] = add nsw i32 [[SUM_NEXT]], [[SUM_NEXT_1]]
+; CHECK-NEXT: [[SUM_RED255:%.*]] = add nsw i32 [[SUM_RED254]], [[SUM_NEXT_2]]
+; CHECK-NEXT: [[SUM_RED256:%.*]] = add nsw i32 [[SUM_RED255]], [[SUM_NEXT_3]]
+; CHECK-NEXT: [[SUM_RED257:%.*]] = add nsw i32 [[SUM_RED256]], [[SUM_NEXT_4]]
+; CHECK-NEXT: [[SUM_RED258:%.*]] = add nsw i32 [[SUM_RED257]], [[SUM_NEXT_5]]
+; CHECK-NEXT: [[SUM_RED259:%.*]] = add nsw i32 [[SUM_RED258]], [[SUM_NEXT_6]]
+; CHECK-NEXT: [[SUM_RED260:%.*]] = add nsw i32 [[SUM_RED259]], [[SUM_NEXT_7]]
+; CHECK-NEXT: [[SUM_RED261:%.*]] = add nsw i32 [[SUM_RED260]], [[SUM_NEXT_8]]
+; CHECK-NEXT: [[SUM_RED262:%.*]] = add nsw i32 [[SUM_RED261]], [[SUM_NEXT_9]]
+; CHECK-NEXT: [[SUM_RED263:%.*]] = add nsw i32 [[SUM_RED262]], [[SUM_NEXT_10]]
+; CHECK-NEXT: [[SUM_RED264:%.*]] = add nsw i32 [[SUM_RED263]], [[SUM_NEXT_11]]
+; CHECK-NEXT: [[SUM_RED265:%.*]] = add nsw i32 [[SUM_RED264]], [[SUM_NEXT_12]]
+; CHECK-NEXT: [[SUM_RED266:%.*]] = add nsw i32 [[SUM_RED265]], [[SUM_NEXT_13]]
+; CHECK-NEXT: [[SUM_RED267:%.*]] = add nsw i32 [[SUM_RED266]], [[SUM_NEXT_14]]
+; CHECK-NEXT: [[SUM_RED268:%.*]] = add nsw i32 [[SUM_RED267]], [[SUM_NEXT_15]]
+; CHECK-NEXT: [[SUM_RED269:%.*]] = add nsw i32 [[SUM_RED268]], [[SUM_NEXT_16]]
+; CHECK-NEXT: [[SUM_RED270:%.*]] = add nsw i32 [[SUM_RED269]], [[SUM_NEXT_17]]
+; CHECK-NEXT: [[SUM_RED271:%.*]] = add nsw i32 [[SUM_RED270]], [[SUM_NEXT_18]]
+; CHECK-NEXT: [[SUM_RED272:%.*]] = add nsw i32 [[SUM_RED271]], [[SUM_NEXT_19]]
+; CHECK-NEXT: [[SUM_RED273:%.*]] = add nsw i32 [[SUM_RED272]], [[SUM_NEXT_20]]
+; CHECK-NEXT: [[SUM_RED274:%.*]] = add nsw i32 [[SUM_RED273]], [[SUM_NEXT_21]]
+; CHECK-NEXT: [[SUM_RED275:%.*]] = add nsw i32 [[SUM_RED274]], [[SUM_NEXT_22]]
+; CHECK-NEXT: [[SUM_RED276:%.*]] = add nsw i32 [[SUM_RED275]], [[SUM_NEXT_23]]
+; CHECK-NEXT: [[SUM_RED277:%.*]] = add nsw i32 [[SUM_RED276]], [[SUM_NEXT_24]]
+; CHECK-NEXT: [[SUM_RED278:%.*]] = add nsw i32 [[SUM_RED277]], [[SUM_NEXT_25]]
+; CHECK-NEXT: [[SUM_RED279:%.*]] = add nsw i32 [[SUM_RED278]], [[SUM_NEXT_26]]
+; CHECK-NEXT: [[SUM_RED280:%.*]] = add nsw i32 [[SUM_RED279]], [[SUM_NEXT_27]]
+; CHECK-NEXT: [[SUM_RED281:%.*]] = add nsw i32 [[SUM_RED280]], [[SUM_NEXT_28]]
+; CHECK-NEXT: [[SUM_RED282:%.*]] = add nsw i32 [[SUM_RED281]], [[SUM_NEXT_29]]
+; CHECK-NEXT: [[SUM_RED283:%.*]] = add nsw i32 [[SUM_RED282]], [[SUM_NEXT_30]]
+; CHECK-NEXT: [[SUM_RED284:%.*]] = add nsw i32 [[SUM_RED283]], [[SUM_NEXT_31]]
+; CHECK-NEXT: [[SUM_RED285:%.*]] = add nsw i32 [[SUM_RED284]], [[SUM_NEXT_32]]
+; CHECK-NEXT: [[SUM_RED286:%.*]] = add nsw i32 [[SUM_RED285]], [[SUM_NEXT_33]]
+; CHECK-NEXT: [[SUM_RED287:%.*]] = add nsw i32 [[SUM_RED286]], [[SUM_NEXT_34]]
+; CHECK-NEXT: [[SUM_RED288:%.*]] = add nsw i32 [[SUM_RED287]], [[SUM_NEXT_35]]
+; CHECK-NEXT: [[SUM_RED289:%.*]] = add nsw i32 [[SUM_RED288]], [[SUM_NEXT_36]]
+; CHECK-NEXT: [[SUM_RED290:%.*]] = add nsw i32 [[SUM_RED289]], [[SUM_NEXT_37]]
+; CHECK-NEXT: [[SUM_RED291:%.*]] = add nsw i32 [[SUM_RED290]], [[SUM_NEXT_38]]
+; CHECK-NEXT: [[SUM_RED292:%.*]] = add nsw i32 [[SUM_RED291]], [[SUM_NEXT_39]]
+; CHECK-NEXT: [[SUM_RED293:%.*]] = add nsw i32 [[SUM_RED292]], [[SUM_NEXT_40]]
+; CHECK-NEXT: [[SUM_RED294:%.*]] = add nsw i32 [[SUM_RED293]], [[SUM_NEXT_41]]
+; CHECK-NEXT: [[SUM_RED295:%.*]] = add nsw i32 [[SUM_RED294]], [[SUM_NEXT_42]]
+; CHECK-NEXT: [[SUM_RED296:%.*]] = add nsw i32 [[SUM_RED295]], [[SUM_NEXT_43]]
+; CHECK-NEXT: [[SUM_RED297:%.*]] = add nsw i32 [[SUM_RED296]], [[SUM_NEXT_44]]
+; CHECK-NEXT: [[SUM_RED298:%.*]] = add nsw i32 [[SUM_RED297]], [[SUM_NEXT_45]]
+; CHECK-NEXT: [[SUM_RED299:%.*]] = add nsw i32 [[SUM_RED298]], [[SUM_NEXT_46]]
+; CHECK-NEXT: [[SUM_RED300:%.*]] = add nsw i32 [[SUM_RED299]], [[SUM_NEXT_47]]
+; CHECK-NEXT: [[SUM_RED301:%.*]] = add nsw i32 [[SUM_RED300]], [[SUM_NEXT_48]]
+; CHECK-NEXT: [[SUM_RED302:%.*]] = add nsw i32 [[SUM_RED301]], [[SUM_NEXT_49]]
+; CHECK-NEXT: [[SUM_RED303:%.*]] = add nsw i32 [[SUM_RED302]], [[SUM_NEXT_50]]
+; CHECK-NEXT: [[SUM_RED304:%.*]] = add nsw i32 [[SUM_RED303]], [[SUM_NEXT_51]]
+; CHECK-NEXT: [[SUM_RED305:%.*]] = add nsw i32 [[SUM_RED304]], [[SUM_NEXT_52]]
+; CHECK-NEXT: [[SUM_RED306:%.*]] = add nsw i32 [[SUM_RED305]], [[SUM_NEXT_53]]
+; CHECK-NEXT: [[SUM_RED307:%.*]] = add nsw i32 [[SUM_RED306]], [[SUM_NEXT_54]]
+; CHECK-NEXT: [[SUM_RED308:%.*]] = add nsw i32 [[SUM_RED307]], [[SUM_NEXT_55]]
+; CHECK-NEXT: [[SUM_RED309:%.*]] = add nsw i32 [[SUM_RED308]], [[SUM_NEXT_56]]
+; CHECK-NEXT: [[SUM_RED310:%.*]] = add nsw i32 [[SUM_RED309]], [[SUM_NEXT_57]]
+; CHECK-NEXT: [[SUM_RED311:%.*]] = add nsw i32 [[SUM_RED310]], [[SUM_NEXT_58]]
+; CHECK-NEXT: [[SUM_RED312:%.*]] = add nsw i32 [[SUM_RED311]], [[SUM_NEXT_59]]
+; CHECK-NEXT: [[SUM_RED313:%.*]] = add nsw i32 [[SUM_RED312]], [[SUM_NEXT_60]]
+; CHECK-NEXT: [[SUM_RED314:%.*]] = add nsw i32 [[SUM_RED313]], [[SUM_NEXT_61]]
+; CHECK-NEXT: [[SUM_RED315:%.*]] = add nsw i32 [[SUM_RED314]], [[SUM_NEXT_62]]
+; CHECK-NEXT: [[SUM_RED316:%.*]] = add nsw i32 [[SUM_RED315]], [[SUM_NEXT_63]]
+; CHECK-NEXT: [[SUM_RED317:%.*]] = add nsw i32 [[SUM_RED316]], [[SUM_NEXT_64]]
+; CHECK-NEXT: [[SUM_RED318:%.*]] = add nsw i32 [[SUM_RED317]], [[SUM_NEXT_65]]
+; CHECK-NEXT: [[SUM_RED319:%.*]] = add nsw i32 [[SUM_RED318]], [[SUM_NEXT_66]]
+; CHECK-NEXT: [[SUM_RED320:%.*]] = add nsw i32 [[SUM_RED319]], [[SUM_NEXT_67]]
+; CHECK-NEXT: [[SUM_RED321:%.*]] = add nsw i32 [[SUM_RED320]], [[SUM_NEXT_68]]
+; CHECK-NEXT: [[SUM_RED322:%.*]] = add nsw i32 [[SUM_RED321]], [[SUM_NEXT_69]]
+; CHECK-NEXT: [[SUM_RED323:%.*]] = add nsw i32 [[SUM_RED322]], [[SUM_NEXT_70]]
+; CHECK-NEXT: [[SUM_RED324:%.*]] = add nsw i32 [[SUM_RED323]], [[SUM_NEXT_71]]
+; CHECK-NEXT: [[SUM_RED325:%.*]] = add nsw i32 [[SUM_RED324]], [[SUM_NEXT_72]]
+; CHECK-NEXT: [[SUM_RED326:%.*]] = add nsw i32 [[SUM_RED325]], [[SUM_NEXT_73]]
+; CHECK-NEXT: [[SUM_RED327:%.*]] = add nsw i32 [[SUM_RED326]], [[SUM_NEXT_74]]
+; CHECK-NEXT: [[SUM_RED328:%.*]] = add nsw i32 [[SUM_RED327]], [[SUM_NEXT_75]]
+; CHECK-NEXT: [[SUM_RED329:%.*]] = add nsw i32 [[SUM_RED328]], [[SUM_NEXT_76]]
+; CHECK-NEXT: [[SUM_RED330:%.*]] = add nsw i32 [[SUM_RED329]], [[SUM_NEXT_77]]
+; CHECK-NEXT: [[SUM_RED331:%.*]] = add nsw i32 [[SUM_RED330]], [[SUM_NEXT_78]]
+; CHECK-NEXT: [[SUM_RED332:%.*]] = add nsw i32 [[SUM_RED331]], [[SUM_NEXT_79]]
+; CHECK-NEXT: [[SUM_RED333:%.*]] = add nsw i32 [[SUM_RED332]], [[SUM_NEXT_80]]
+; CHECK-NEXT: [[SUM_RED334:%.*]] = add nsw i32 [[SUM_RED333]], [[SUM_NEXT_81]]
+; CHECK-NEXT: [[SUM_RED335:%.*]] = add nsw i32 [[SUM_RED334]], [[SUM_NEXT_82]]
+; CHECK-NEXT: [[SUM_RED336:%.*]] = add nsw i32 [[SUM_RED335]], [[SUM_NEXT_83]]
+; CHECK-NEXT: [[SUM_RED337:%.*]] = add nsw i32 [[SUM_RED336]], [[SUM_NEXT_84]]
+; CHECK-NEXT: [[SUM_RED338:%.*]] = add nsw i32 [[SUM_RED337]], [[SUM_NEXT_85]]
+; CHECK-NEXT: [[SUM_RED339:%.*]] = add nsw i32 [[SUM_RED338]], [[SUM_NEXT_86]]
+; CHECK-NEXT: [[SUM_RED340:%.*]] = add nsw i32 [[SUM_RED339]], [[SUM_NEXT_87]]
+; CHECK-NEXT: [[SUM_RED341:%.*]] = add nsw i32 [[SUM_RED340]], [[SUM_NEXT_88]]
+; CHECK-NEXT: [[SUM_RED342:%.*]] = add nsw i32 [[SUM_RED341]], [[SUM_NEXT_89]]
+; CHECK-NEXT: [[SUM_RED343:%.*]] = add nsw i32 [[SUM_RED342]], [[SUM_NEXT_90]]
+; CHECK-NEXT: [[SUM_RED344:%.*]] = add nsw i32 [[SUM_RED343]], [[SUM_NEXT_91]]
+; CHECK-NEXT: [[SUM_RED345:%.*]] = add nsw i32 [[SUM_RED344]], [[SUM_NEXT_92]]
+; CHECK-NEXT: [[SUM_RED346:%.*]] = add nsw i32 [[SUM_RED345]], [[SUM_NEXT_93]]
+; CHECK-NEXT: [[SUM_RED347:%.*]] = add nsw i32 [[SUM_RED346]], [[SUM_NEXT_94]]
+; CHECK-NEXT: [[SUM_RED348:%.*]] = add nsw i32 [[SUM_RED347]], [[SUM_NEXT_95]]
+; CHECK-NEXT: [[SUM_RED349:%.*]] = add nsw i32 [[SUM_RED348]], [[SUM_NEXT_96]]
+; CHECK-NEXT: [[SUM_RED350:%.*]] = add nsw i32 [[SUM_RED349]], [[SUM_NEXT_97]]
+; CHECK-NEXT: [[SUM_RED351:%.*]] = add nsw i32 [[SUM_RED350]], [[SUM_NEXT_98]]
+; CHECK-NEXT: [[SUM_RED352:%.*]] = add nsw i32 [[SUM_RED351]], [[SUM_NEXT_99]]
+; CHECK-NEXT: [[SUM_RED353:%.*]] = add nsw i32 [[SUM_RED352]], [[SUM_NEXT_100]]
+; CHECK-NEXT: [[SUM_RED354:%.*]] = add nsw i32 [[SUM_RED353]], [[SUM_NEXT_101]]
+; CHECK-NEXT: [[SUM_RED355:%.*]] = add nsw i32 [[SUM_RED354]], [[SUM_NEXT_102]]
+; CHECK-NEXT: [[SUM_RED356:%.*]] = add nsw i32 [[SUM_RED355]], [[SUM_NEXT_103]]
+; CHECK-NEXT: [[SUM_RED357:%.*]] = add nsw i32 [[SUM_RED356]], [[SUM_NEXT_104]]
+; CHECK-NEXT: [[SUM_RED358:%.*]] = add nsw i32 [[SUM_RED357]], [[SUM_NEXT_105]]
+; CHECK-NEXT: [[SUM_RED359:%.*]] = add nsw i32 [[SUM_RED358]], [[SUM_NEXT_106]]
+; CHECK-NEXT: [[SUM_RED360:%.*]] = add nsw i32 [[SUM_RED359]], [[SUM_NEXT_107]]
+; CHECK-NEXT: [[SUM_RED361:%.*]] = add nsw i32 [[SUM_RED360]], [[SUM_NEXT_108]]
+; CHECK-NEXT: [[SUM_RED362:%.*]] = add nsw i32 [[SUM_RED361]], [[SUM_NEXT_109]]
+; CHECK-NEXT: [[SUM_RED363:%.*]] = add nsw i32 [[SUM_RED362]], [[SUM_NEXT_110]]
+; CHECK-NEXT: [[SUM_RED364:%.*]] = add nsw i32 [[SUM_RED363]], [[SUM_NEXT_111]]
+; CHECK-NEXT: [[SUM_RED365:%.*]] = add nsw i32 [[SUM_RED364]], [[SUM_NEXT_112]]
+; CHECK-NEXT: [[SUM_RED366:%.*]] = add nsw i32 [[SUM_RED365]], [[SUM_NEXT_113]]
+; CHECK-NEXT: [[SUM_RED367:%.*]] = add nsw i32 [[SUM_RED366]], [[SUM_NEXT_114]]
+; CHECK-NEXT: [[SUM_RED368:%.*]] = add nsw i32 [[SUM_RED367]], [[SUM_NEXT_115]]
+; CHECK-NEXT: [[SUM_RED369:%.*]] = add nsw i32 [[SUM_RED368]], [[SUM_NEXT_116]]
+; CHECK-NEXT: [[SUM_RED370:%.*]] = add nsw i32 [[SUM_RED369]], [[SUM_NEXT_117]]
+; CHECK-NEXT: [[SUM_RED371:%.*]] = add nsw i32 [[SUM_RED370]], [[SUM_NEXT_118]]
+; CHECK-NEXT: [[SUM_RED372:%.*]] = add nsw i32 [[SUM_RED371]], [[SUM_NEXT_119]]
+; CHECK-NEXT: [[SUM_RED373:%.*]] = add nsw i32 [[SUM_RED372]], [[SUM_NEXT_120]]
+; CHECK-NEXT: [[SUM_RED374:%.*]] = add nsw i32 [[SUM_RED373]], [[SUM_NEXT_121]]
+; CHECK-NEXT: [[SUM_RED375:%.*]] = add nsw i32 [[SUM_RED374]], [[SUM_NEXT_122]]
+; CHECK-NEXT: [[SUM_RED376:%.*]] = add nsw i32 [[SUM_RED375]], [[SUM_NEXT_123]]
+; CHECK-NEXT: [[SUM_RED377:%.*]] = add nsw i32 [[SUM_RED376]], [[SUM_NEXT_124]]
+; CHECK-NEXT: [[SUM_RED378:%.*]] = add nsw i32 [[SUM_RED377]], [[SUM_NEXT_125]]
+; CHECK-NEXT: [[SUM_RED379:%.*]] = add nsw i32 [[SUM_RED378]], [[SUM_NEXT_126]]
+; CHECK-NEXT: [[SUM_RED380:%.*]] = add nsw i32 [[SUM_RED379]], [[SUM_NEXT_127]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT_127]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
; CHECK: for.cond.cleanup:
-; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_127]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_RED380]], [[FOR_BODY]] ]
; CHECK-NEXT: ret i32 [[SUM_NEXT_LCSSA]]
;
entry:
@@ -559,7 +813,14 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM2:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM3:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_2:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM4:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM5:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_4:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_5:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM7:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM8:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_7:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -613,7 +874,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-NEXT: [[DUMMY48:%.*]] = mul i32 [[DUMMY47]], [[DUMMY47]]
; CHECK-NEXT: [[DUMMY49:%.*]] = mul i32 [[DUMMY48]], [[DUMMY48]]
; CHECK-NEXT: [[DUMMY50:%.*]] = mul i32 [[DUMMY49]], [[DUMMY49]]
-; CHECK-NEXT: [[SUM_NEXT:%.*]] = add nsw i32 [[DUMMY50]], [[SUM]]
+; CHECK-NEXT: [[SUM_NEXT]] = add nsw i32 [[DUMMY50]], [[SUM]]
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[VAL_1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
@@ -667,7 +928,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-NEXT: [[DUMMY48_1:%.*]] = mul i32 [[DUMMY47_1]], [[DUMMY47_1]]
; CHECK-NEXT: [[DUMMY49_1:%.*]] = mul i32 [[DUMMY48_1]], [[DUMMY48_1]]
; CHECK-NEXT: [[DUMMY50_1:%.*]] = mul i32 [[DUMMY49_1]], [[DUMMY49_1]]
-; CHECK-NEXT: [[SUM_NEXT_1:%.*]] = add nsw i32 [[DUMMY50_1]], [[SUM_NEXT]]
+; CHECK-NEXT: [[SUM_NEXT_1]] = add nsw i32 [[DUMMY50_1]], [[SUM2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[VAL_2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
@@ -721,7 +982,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-NEXT: [[DUMMY48_2:%.*]] = mul i32 [[DUMMY47_2]], [[DUMMY47_2]]
; CHECK-NEXT: [[DUMMY49_2:%.*]] = mul i32 [[DUMMY48_2]], [[DUMMY48_2]]
; CHECK-NEXT: [[DUMMY50_2:%.*]] = mul i32 [[DUMMY49_2]], [[DUMMY49_2]]
-; CHECK-NEXT: [[SUM_NEXT_2:%.*]] = add nsw i32 [[DUMMY50_2]], [[SUM_NEXT_1]]
+; CHECK-NEXT: [[SUM_NEXT_2]] = add nsw i32 [[DUMMY50_2]], [[SUM3]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[VAL_3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
@@ -775,7 +1036,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-NEXT: [[DUMMY48_3:%.*]] = mul i32 [[DUMMY47_3]], [[DUMMY47_3]]
; CHECK-NEXT: [[DUMMY49_3:%.*]] = mul i32 [[DUMMY48_3]], [[DUMMY48_3]]
; CHECK-NEXT: [[DUMMY50_3:%.*]] = mul i32 [[DUMMY49_3]], [[DUMMY49_3]]
-; CHECK-NEXT: [[SUM_NEXT_3:%.*]] = add nsw i32 [[DUMMY50_3]], [[SUM_NEXT_2]]
+; CHECK-NEXT: [[SUM_NEXT_3]] = add nsw i32 [[DUMMY50_3]], [[SUM4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: [[VAL_4:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
@@ -829,7 +1090,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-NEXT: [[DUMMY48_4:%.*]] = mul i32 [[DUMMY47_4]], [[DUMMY47_4]]
; CHECK-NEXT: [[DUMMY49_4:%.*]] = mul i32 [[DUMMY48_4]], [[DUMMY48_4]]
; CHECK-NEXT: [[DUMMY50_4:%.*]] = mul i32 [[DUMMY49_4]], [[DUMMY49_4]]
-; CHECK-NEXT: [[SUM_NEXT_4:%.*]] = add nsw i32 [[DUMMY50_4]], [[SUM_NEXT_3]]
+; CHECK-NEXT: [[SUM_NEXT_4]] = add nsw i32 [[DUMMY50_4]], [[SUM5]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: [[VAL_5:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
@@ -883,7 +1144,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-NEXT: [[DUMMY48_5:%.*]] = mul i32 [[DUMMY47_5]], [[DUMMY47_5]]
; CHECK-NEXT: [[DUMMY49_5:%.*]] = mul i32 [[DUMMY48_5]], [[DUMMY48_5]]
; CHECK-NEXT: [[DUMMY50_5:%.*]] = mul i32 [[DUMMY49_5]], [[DUMMY49_5]]
-; CHECK-NEXT: [[SUM_NEXT_5:%.*]] = add nsw i32 [[DUMMY50_5]], [[SUM_NEXT_4]]
+; CHECK-NEXT: [[SUM_NEXT_5]] = add nsw i32 [[DUMMY50_5]], [[SUM6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: [[VAL_6:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
@@ -937,7 +1198,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-NEXT: [[DUMMY48_6:%.*]] = mul i32 [[DUMMY47_6]], [[DUMMY47_6]]
; CHECK-NEXT: [[DUMMY49_6:%.*]] = mul i32 [[DUMMY48_6]], [[DUMMY48_6]]
; CHECK-NEXT: [[DUMMY50_6:%.*]] = mul i32 [[DUMMY49_6]], [[DUMMY49_6]]
-; CHECK-NEXT: [[SUM_NEXT_6:%.*]] = add nsw i32 [[DUMMY50_6]], [[SUM_NEXT_5]]
+; CHECK-NEXT: [[SUM_NEXT_6]] = add nsw i32 [[DUMMY50_6]], [[SUM7]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: [[VAL_7:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
@@ -991,15 +1252,22 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-NEXT: [[DUMMY48_7:%.*]] = mul i32 [[DUMMY47_7]], [[DUMMY47_7]]
; CHECK-NEXT: [[DUMMY49_7:%.*]] = mul i32 [[DUMMY48_7]], [[DUMMY48_7]]
; CHECK-NEXT: [[DUMMY50_7:%.*]] = mul i32 [[DUMMY49_7]], [[DUMMY49_7]]
-; CHECK-NEXT: [[SUM_NEXT_7]] = add nsw i32 [[DUMMY50_7]], [[SUM_NEXT_6]]
+; CHECK-NEXT: [[SUM_NEXT_7]] = add nsw i32 [[DUMMY50_7]], [[SUM8]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT: [[SUM_RED15:%.*]] = add nsw i32 [[SUM_NEXT]], [[SUM_NEXT_1]]
+; CHECK-NEXT: [[SUM_RED16:%.*]] = add nsw i32 [[SUM_RED15]], [[SUM_NEXT_2]]
+; CHECK-NEXT: [[SUM_RED17:%.*]] = add nsw i32 [[SUM_RED16]], [[SUM_NEXT_3]]
+; CHECK-NEXT: [[SUM_RED18:%.*]] = add nsw i32 [[SUM_RED17]], [[SUM_NEXT_4]]
+; CHECK-NEXT: [[SUM_RED19:%.*]] = add nsw i32 [[SUM_RED18]], [[SUM_NEXT_5]]
+; CHECK-NEXT: [[SUM_RED20:%.*]] = add nsw i32 [[SUM_RED19]], [[SUM_NEXT_6]]
+; CHECK-NEXT: [[SUM_RED21:%.*]] = add nsw i32 [[SUM_RED20]], [[SUM_NEXT_7]]
; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
; CHECK: for.cond.cleanup.unr-lcssa.loopexit:
-; CHECK-NEXT: [[SUM_NEXT_LCSSA_PH_PH:%.*]] = phi i32 [ [[SUM_NEXT_7]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_NEXT_LCSSA_PH_PH:%.*]] = phi i32 [ [[SUM_RED21]], [[FOR_BODY]] ]
; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[SUM_UNR_PH:%.*]] = phi i32 [ [[SUM_NEXT_7]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_UNR_PH:%.*]] = phi i32 [ [[SUM_RED21]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_UNR_LCSSA]]
; CHECK: for.cond.cleanup.unr-lcssa:
; CHECK-NEXT: [[SUM_NEXT_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_NEXT_LCSSA_PH_PH]], [[FOR_COND_CLEANUP_UNR_LCSSA_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
index b44206a044c291..be9db67f3c9aa4 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -75,31 +75,37 @@ define i3 @test(ptr %a, i3 %n) {
; UNROLL-4-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-4: for.body:
; UNROLL-4-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ]
-; UNROLL-4-NEXT: [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT: [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT: [[SUM_024:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_1:%.*]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT: [[SUM_025:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_2:%.*]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT: [[SUM_026:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: [[NITER:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i3, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; UNROLL-4-NEXT: [[TMP2:%.*]] = load i3, ptr [[ARRAYIDX]], align 1
-; UNROLL-4-NEXT: [[ADD:%.*]] = add nsw i3 [[TMP2]], [[SUM_02]]
+; UNROLL-4-NEXT: [[ADD]] = add nsw i3 [[TMP2]], [[SUM_02]]
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-4-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-4-NEXT: [[TMP3:%.*]] = load i3, ptr [[ARRAYIDX_1]], align 1
-; UNROLL-4-NEXT: [[ADD_1:%.*]] = add nsw i3 [[TMP3]], [[ADD]]
+; UNROLL-4-NEXT: [[ADD_1]] = add nsw i3 [[TMP3]], [[SUM_024]]
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; UNROLL-4-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
; UNROLL-4-NEXT: [[TMP4:%.*]] = load i3, ptr [[ARRAYIDX_2]], align 1
-; UNROLL-4-NEXT: [[ADD_2:%.*]] = add nsw i3 [[TMP4]], [[ADD_1]]
+; UNROLL-4-NEXT: [[ADD_2]] = add nsw i3 [[TMP4]], [[SUM_025]]
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
; UNROLL-4-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
; UNROLL-4-NEXT: [[TMP5:%.*]] = load i3, ptr [[ARRAYIDX_3]], align 1
-; UNROLL-4-NEXT: [[ADD_3]] = add nsw i3 [[TMP5]], [[ADD_2]]
+; UNROLL-4-NEXT: [[ADD_3]] = add nsw i3 [[TMP5]], [[SUM_026]]
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
; UNROLL-4-NEXT: [[NITER_NEXT_3]] = add i3 [[NITER]], -4
; UNROLL-4-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i3 [[NITER_NEXT_3]], [[UNROLL_ITER]]
+; UNROLL-4-NEXT: [[SUM_02_RED9:%.*]] = add nsw i3 [[ADD]], [[ADD_1]]
+; UNROLL-4-NEXT: [[SUM_02_RED10:%.*]] = add nsw i3 [[SUM_02_RED9]], [[ADD_2]]
+; UNROLL-4-NEXT: [[SUM_02_RED11:%.*]] = add nsw i3 [[SUM_02_RED10]], [[ADD_3]]
; UNROLL-4-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; UNROLL-4: for.end.loopexit.unr-lcssa.loopexit:
-; UNROLL-4-NEXT: [[ADD_LCSSA_PH_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT: [[ADD_LCSSA_PH_PH:%.*]] = phi i3 [ [[SUM_02_RED11]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
-; UNROLL-4-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i3 [ [[SUM_02_RED11]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
; UNROLL-4: for.end.loopexit.unr-lcssa:
; UNROLL-4-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i3 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[ADD_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
index fea9df610c3e82..ab38fa84efc7f7 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
@@ -16,11 +16,14 @@ define i32 @unroll(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N)
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit.unr-lcssa.loopexit:
+; CHECK-NEXT: [[C_010_RED7:%.*]] = add nsw i32 [[ADD:%.*]], [[ADD_1:%.*]]
+; CHECK-NEXT: [[C_010_RED8:%.*]] = add nsw i32 [[C_010_RED7]], [[ADD_2:%.*]]
+; CHECK-NEXT: [[C_010_RED9:%.*]] = add nsw i32 [[C_010_RED8]], [[ADD_3:%.*]]
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]
; CHECK: for.cond.cleanup.loopexit.unr-lcssa:
-; CHECK-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
+; CHECK-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_LR_PH]] ], [ [[C_010_RED9]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT: [[C_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD_3]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT: [[C_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[C_010_RED9]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_EPIL_PREHEADER:%.*]]
; CHECK: for.body.epil.preheader:
@@ -64,35 +67,38 @@ define i32 @unroll(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N)
; CHECK-NEXT: ret i32 [[C_0_LCSSA]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[C_0102:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_1]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[C_0103:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_2]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[C_0104:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], [[C_010]]
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[C_010]]
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX2_1]], align 4
; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[ADD]]
+; CHECK-NEXT: [[ADD_1]] = add nsw i32 [[MUL_1]], [[C_0102]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX2_2]], align 4
; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]]
-; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[MUL_2]], [[ADD_1]]
+; CHECK-NEXT: [[ADD_2]] = add nsw i32 [[MUL_2]], [[C_0103]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX2_3]], align 4
; CHECK-NEXT: [[MUL_3:%.*]] = mul nsw i32 [[TMP14]], [[TMP13]]
-; CHECK-NEXT: [[ADD_3]] = add nsw i32 [[MUL_3]], [[ADD_2]]
+; CHECK-NEXT: [[ADD_3]] = add nsw i32 [[MUL_3]], [[C_0104]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
diff --git a/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll b/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
index 2d4dd76a6cab37..a1ae26ece595b7 100644
--- a/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
+++ b/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=loop-unroll -unroll-count=2 < %s | FileCheck %s
+; RUN: opt -S -passes=loop-unroll -unroll-count=2 -unroll-simplify-reductions=true < %s | FileCheck %s
; The loops below are variations of:
; double sum = 0;
@@ -14,19 +14,21 @@ define double @p1(ptr %A) {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM1:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
-; CHECK-NEXT: [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT: [[SUM_NEXT]] = fadd fast double [[A_VAL]], [[SUM]]
; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
; CHECK-NEXT: [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
-; CHECK-NEXT: [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT: [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM1]]
; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: [[SUM_RED:%.*]] = fadd fast double [[SUM_NEXT]], [[SUM_NEXT_1]]
; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi double [ [[SUM_RED]], [[LOOP]] ]
; CHECK-NEXT: ret double [[SUM_LCSSA]]
;
entry:
@@ -54,19 +56,21 @@ define double @p2(ptr %A, double %acc) {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[SUM:%.*]] = phi double [ [[ACC:%.*]], [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi double [ [[ACC:%.*]], [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM1:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
-; CHECK-NEXT: [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT: [[SUM_NEXT]] = fadd fast double [[A_VAL]], [[SUM]]
; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
; CHECK-NEXT: [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
-; CHECK-NEXT: [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT: [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM1]]
; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: [[SUM_RED:%.*]] = fadd fast double [[SUM_NEXT]], [[SUM_NEXT_1]]
; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi double [ [[SUM_RED]], [[LOOP]] ]
; CHECK-NEXT: ret double [[SUM_LCSSA]]
;
entry:
@@ -94,19 +98,21 @@ define i64 @p3(ptr %A) {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[AND:%.*]] = phi i64 [ -1, [[ENTRY]] ], [ [[AND_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[AND:%.*]] = phi i64 [ -1, [[ENTRY]] ], [ [[AND_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[AND1:%.*]] = phi i64 [ -1, [[ENTRY]] ], [ [[AND_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[I]]
; CHECK-NEXT: [[A_VAL:%.*]] = load i64, ptr [[A_GEP]], align 8
-; CHECK-NEXT: [[AND_NEXT:%.*]] = and i64 [[A_VAL]], [[AND]]
+; CHECK-NEXT: [[AND_NEXT]] = and i64 [[A_VAL]], [[AND]]
; CHECK-NEXT: [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_NEXT]]
; CHECK-NEXT: [[A_VAL_1:%.*]] = load i64, ptr [[A_GEP_1]], align 8
-; CHECK-NEXT: [[AND_NEXT_1]] = and i64 [[A_VAL_1]], [[AND_NEXT]]
+; CHECK-NEXT: [[AND_NEXT_1]] = and i64 [[A_VAL_1]], [[AND1]]
; CHECK-NEXT: [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT: [[AND_RED:%.*]] = and i64 [[AND_NEXT]], [[AND_NEXT_1]]
; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[AND_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[AND_RED]], [[LOOP]] ]
; CHECK-NEXT: ret i64 [[AND_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
index 5fe267d62f9333..4b83483f214da1 100644
--- a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
+++ b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
@@ -18,129 +18,182 @@ define dso_local zeroext i32 @foo(ptr noundef %a) #0 {
; CHECK-NEXT: br label [[FOR_BODY4:%.*]]
; CHECK: for.body4:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY4]] ]
-; CHECK-NEXT: [[SUM_11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT: [[SUM_11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT: [[SUM_1121:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_111:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT: [[SUM_1122:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_218:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT: [[SUM_1123:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_3:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT: [[SUM_1124:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_4:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT: [[SUM_1125:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_5:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT: [[SUM_1126:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_6:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT: [[SUM_1127:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7:%.*]], [[FOR_BODY4]] ]
; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i64 0, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA3:![0-9]+]]
-; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[SUM_11]]
+; CHECK-NEXT: [[ADD]] = add i32 [[TMP0]], [[SUM_11]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_NEG:%.*]] = xor i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[ADD_PTR_110:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_NEG]]
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_110]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[ADD_111:%.*]] = add i32 [[TMP1]], [[ADD]]
+; CHECK-NEXT: [[ADD_111]] = add i32 [[TMP1]], [[SUM_1121]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_112_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_217:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_112_NEG]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ADD_PTR_217]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[ADD_218:%.*]] = add i32 [[TMP2]], [[ADD_111]]
+; CHECK-NEXT: [[ADD_218]] = add i32 [[TMP2]], [[SUM_1122]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_219_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_219_NEG]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ADD_PTR_3]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[TMP3]], [[ADD_218]]
+; CHECK-NEXT: [[ADD_3]] = add i32 [[TMP3]], [[SUM_1123]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_3_NEG]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ADD_PTR_4]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[TMP4]], [[ADD_3]]
+; CHECK-NEXT: [[ADD_4]] = add i32 [[TMP4]], [[SUM_1124]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_4_NEG]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADD_PTR_5]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[ADD_5:%.*]] = add i32 [[TMP5]], [[ADD_4]]
+; CHECK-NEXT: [[ADD_5]] = add i32 [[TMP5]], [[SUM_1125]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_5_NEG]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_6]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[ADD_6:%.*]] = add i32 [[TMP6]], [[ADD_5]]
+; CHECK-NEXT: [[ADD_6]] = add i32 [[TMP6]], [[SUM_1126]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_6_NEG]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[ADD_7]] = add i32 [[TMP7]], [[ADD_6]]
+; CHECK-NEXT: [[ADD_7]] = add i32 [[TMP7]], [[SUM_1127]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], 32
-; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_BODY4_1:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_BODY4_1_PREHEADER:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: for.body4.1.preheader:
+; CHECK-NEXT: [[SUM_11_RED34:%.*]] = add i32 [[ADD]], [[ADD_111]]
+; CHECK-NEXT: [[SUM_11_RED35:%.*]] = add i32 [[SUM_11_RED34]], [[ADD_218]]
+; CHECK-NEXT: [[SUM_11_RED36:%.*]] = add i32 [[SUM_11_RED35]], [[ADD_3]]
+; CHECK-NEXT: [[SUM_11_RED37:%.*]] = add i32 [[SUM_11_RED36]], [[ADD_4]]
+; CHECK-NEXT: [[SUM_11_RED38:%.*]] = add i32 [[SUM_11_RED37]], [[ADD_5]]
+; CHECK-NEXT: [[SUM_11_RED39:%.*]] = add i32 [[SUM_11_RED38]], [[ADD_6]]
+; CHECK-NEXT: [[SUM_11_RED40:%.*]] = add i32 [[SUM_11_RED39]], [[ADD_7]]
+; CHECK-NEXT: br label [[FOR_BODY4_1:%.*]]
; CHECK: for.body4.1:
-; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1_7:%.*]], [[FOR_BODY4_1]] ], [ 0, [[FOR_BODY4]] ]
-; CHECK-NEXT: [[SUM_11_1:%.*]] = phi i32 [ [[ADD_1_7:%.*]], [[FOR_BODY4_1]] ], [ [[ADD_7]], [[FOR_BODY4]] ]
+; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[INDVARS_IV_NEXT_1_7:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT: [[SUM_11_1:%.*]] = phi i32 [ [[SUM_11_RED40]], [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT: [[SUM_11_148:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_1:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT: [[SUM_11_149:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_2:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT: [[SUM_11_150:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_3:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT: [[SUM_11_151:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_4:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT: [[SUM_11_152:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_5:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT: [[SUM_11_153:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_6:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT: [[SUM_11_154:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_7:%.*]], [[FOR_BODY4_1]] ]
; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i64 0, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_1:%.*]] = shl i32 [[TMP8]], 1
+; CHECK-NEXT: [[ADD_1]] = add i32 [[MUL_1]], [[SUM_11_1]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_NEG:%.*]] = xor i64 [[INDVARS_IV_1]], -1
; CHECK-NEXT: [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_NEG]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[MUL_1_1:%.*]] = shl i32 [[TMP9]], 1
+; CHECK-NEXT: [[ADD_1_1]] = add i32 [[MUL_1_1]], [[SUM_11_148]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_1_NEG]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_1_2:%.*]] = shl i32 [[TMP10]], 1
+; CHECK-NEXT: [[ADD_1_2]] = add i32 [[MUL_1_2]], [[SUM_11_149]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_2_NEG]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_1_3:%.*]] = shl i32 [[TMP11]], 1
+; CHECK-NEXT: [[ADD_1_3]] = add i32 [[MUL_1_3]], [[SUM_11_150]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_3_NEG]]
-; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]]
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_1_4:%.*]] = shl i32 [[TMP12]], 1
+; CHECK-NEXT: [[ADD_1_4]] = add i32 [[MUL_1_4]], [[SUM_11_151]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_4_NEG]]
-; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_1_5:%.*]] = shl i32 [[TMP13]], 1
+; CHECK-NEXT: [[ADD_1_5]] = add i32 [[MUL_1_5]], [[SUM_11_152]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_5_NEG]]
-; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_1_6:%.*]] = shl i32 [[TMP14]], 1
+; CHECK-NEXT: [[ADD_1_6]] = add i32 [[MUL_1_6]], [[SUM_11_153]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_6_NEG]]
-; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 1
-; CHECK-NEXT: [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_1_7:%.*]] = shl i32 [[TMP15]], 1
+; CHECK-NEXT: [[ADD_1_7]] = add i32 [[MUL_1_7]], [[SUM_11_154]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8
; CHECK-NEXT: [[EXITCOND_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1_7]], 32
-; CHECK-NEXT: br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]]
+; CHECK-NEXT: br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2_PREHEADER:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]]
+; CHECK: for.body4.2.preheader:
+; CHECK-NEXT: [[SUM_11_1_RED61:%.*]] = add i32 [[ADD_1]], [[ADD_1_1]]
+; CHECK-NEXT: [[SUM_11_1_RED62:%.*]] = add i32 [[SUM_11_1_RED61]], [[ADD_1_2]]
+; CHECK-NEXT: [[SUM_11_1_RED63:%.*]] = add i32 [[SUM_11_1_RED62]], [[ADD_1_3]]
+; CHECK-NEXT: [[SUM_11_1_RED64:%.*]] = add i32 [[SUM_11_1_RED63]], [[ADD_1_4]]
+; CHECK-NEXT: [[SUM_11_1_RED65:%.*]] = add i32 [[SUM_11_1_RED64]], [[ADD_1_5]]
+; CHECK-NEXT: [[SUM_11_1_RED66:%.*]] = add i32 [[SUM_11_1_RED65]], [[ADD_1_6]]
+; CHECK-NEXT: [[SUM_11_1_RED67:%.*]] = add i32 [[SUM_11_1_RED66]], [[ADD_1_7]]
+; CHECK-NEXT: br label [[FOR_BODY4_2:%.*]]
; CHECK: for.body4.2:
-; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2_7:%.*]], [[FOR_BODY4_2]] ], [ 0, [[FOR_BODY4_1]] ]
-; CHECK-NEXT: [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ], [ [[ADD_1_7]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[INDVARS_IV_NEXT_2_7:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT: [[SUM_11_2:%.*]] = phi i32 [ [[SUM_11_1_RED67]], [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT: [[SUM_11_275:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_1:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT: [[SUM_11_276:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_2:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT: [[SUM_11_277:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_3:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT: [[SUM_11_278:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_4:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT: [[SUM_11_279:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_5:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT: [[SUM_11_280:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_6:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT: [[SUM_11_281:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ]
; CHECK-NEXT: [[IDX_NEG_2:%.*]] = sub nsw i64 0, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2]]
-; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP24]], 3
-; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], [[SUM_11_2]]
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP16]], 3
+; CHECK-NEXT: [[ADD_2]] = add i32 [[MUL_2]], [[SUM_11_2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_NEG:%.*]] = xor i64 [[INDVARS_IV_2]], -1
; CHECK-NEXT: [[ADD_PTR_2_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_NEG]]
-; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP25]], 3
-; CHECK-NEXT: [[ADD_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD_2]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP17]], 3
+; CHECK-NEXT: [[ADD_2_1]] = add i32 [[MUL_2_1]], [[SUM_11_275]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_1_NEG]]
-; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP26]], 3
-; CHECK-NEXT: [[ADD_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD_2_1]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP18]], 3
+; CHECK-NEXT: [[ADD_2_2]] = add i32 [[MUL_2_2]], [[SUM_11_276]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_2_NEG]]
-; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP27]], 3
-; CHECK-NEXT: [[ADD_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD_2_2]]
+; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP19]], 3
+; CHECK-NEXT: [[ADD_2_3]] = add i32 [[MUL_2_3]], [[SUM_11_277]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_3_NEG]]
-; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP28]], 3
-; CHECK-NEXT: [[ADD_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD_2_3]]
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP20]], 3
+; CHECK-NEXT: [[ADD_2_4]] = add i32 [[MUL_2_4]], [[SUM_11_278]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_4_NEG]]
-; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP29]], 3
-; CHECK-NEXT: [[ADD_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD_2_4]]
+; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP21]], 3
+; CHECK-NEXT: [[ADD_2_5]] = add i32 [[MUL_2_5]], [[SUM_11_279]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_5_NEG]]
-; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP30]], 3
-; CHECK-NEXT: [[ADD_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD_2_5]]
+; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP22]], 3
+; CHECK-NEXT: [[ADD_2_6]] = add i32 [[MUL_2_6]], [[SUM_11_280]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_6_NEG]]
-; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP31]], 3
-; CHECK-NEXT: [[ADD_2_7]] = add i32 [[MUL_2_7]], [[ADD_2_6]]
+; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP23]], 3
+; CHECK-NEXT: [[ADD_2_7]] = add i32 [[MUL_2_7]], [[SUM_11_281]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_7]] = add nuw nsw i64 [[INDVARS_IV_2]], 8
; CHECK-NEXT: [[EXITCOND_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2_7]], 32
; CHECK-NEXT: br i1 [[EXITCOND_2_NOT_7]], label [[FOR_INC5_2:%.*]], label [[FOR_BODY4_2]], !llvm.loop [[LOOP7]]
; CHECK: for.inc5.2:
-; CHECK-NEXT: ret i32 [[ADD_2_7]]
+; CHECK-NEXT: [[SUM_11_2_RED88:%.*]] = add i32 [[ADD_2]], [[ADD_2_1]]
+; CHECK-NEXT: [[SUM_11_2_RED89:%.*]] = add i32 [[SUM_11_2_RED88]], [[ADD_2_2]]
+; CHECK-NEXT: [[SUM_11_2_RED90:%.*]] = add i32 [[SUM_11_2_RED89]], [[ADD_2_3]]
+; CHECK-NEXT: [[SUM_11_2_RED91:%.*]] = add i32 [[SUM_11_2_RED90]], [[ADD_2_4]]
+; CHECK-NEXT: [[SUM_11_2_RED92:%.*]] = add i32 [[SUM_11_2_RED91]], [[ADD_2_5]]
+; CHECK-NEXT: [[SUM_11_2_RED93:%.*]] = add i32 [[SUM_11_2_RED92]], [[ADD_2_6]]
+; CHECK-NEXT: [[SUM_11_2_RED94:%.*]] = add i32 [[SUM_11_2_RED93]], [[ADD_2_7]]
+; CHECK-NEXT: ret i32 [[SUM_11_2_RED94]]
;
entry:
call void @populate(ptr noundef @ARR)
>From b259694086a5c79991ee1a0bcf6b6562e49c4fbf Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rj.bcjesus at gmail.com>
Date: Tue, 12 Mar 2024 09:55:14 +0000
Subject: [PATCH 3/3] [LoopUnroll] Fix some formatting issues
---
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 31 +++++++++++-------------
1 file changed, 14 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index b14d05d642e275..e30547d8ee27b4 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -84,11 +84,6 @@ STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
"latch (completely or otherwise)");
-static cl::opt<bool>
-UnrollSimplifyReductions("unroll-simplify-reductions", cl::init(true),
- cl::Hidden, cl::desc("Try to simplify reductions "
- "after unrolling a loop."));
-
static cl::opt<bool>
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
cl::desc("Allow runtime unrolled loops to be unrolled "
@@ -114,6 +109,11 @@ UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
#endif
);
+static cl::opt<bool>
+UnrollSimplifyReductions("unroll-simplify-reductions", cl::init(true),
+ cl::Hidden, cl::desc("Try to simplify reductions "
+ "after unrolling a loop."));
+
/// Check if unrolling created a situation where we need to insert phi nodes to
/// preserve LCSSA form.
@@ -265,8 +265,8 @@ static bool trySimplifyReductions(Instruction &I) {
// Attempt to construct a list of instructions that are chained together
// (i.e. that perform a reduction).
SmallVector<BinaryOperator *, 16> Ops;
- for (Instruction *Cur = PN, *Next = nullptr; /* true */; Cur = Next,
- Next = nullptr) {
+ for (Instruction *Cur = PN, *Next = nullptr; /* true */;
+ Cur = Next, Next = nullptr) {
// Try to find the next element in the reduction chain.
for (auto *U : Cur->users()) {
auto *Candidate = dyn_cast<Instruction>(U);
@@ -298,11 +298,8 @@ static bool trySimplifyReductions(Instruction &I) {
if (Ops.size() < 2)
return false;
- LLVM_DEBUG(
- dbgs() << "Found candidate reduction: " << I << "\n";
- for (auto const *Op : Ops)
- dbgs() << " | " << *Op << "\n";
- );
+ LLVM_DEBUG(dbgs() << "Candidate reduction of length " << Ops.size()
+ << " found at " << I << ".\n");
// Ensure all instructions perform the same operation and that the operation
// is associative and commutative so that we can break the chain apart and
@@ -407,9 +404,9 @@ static bool trySimplifyReductions(Instruction &I) {
// Helper function to create a new binary op.
// Note: We copy the flags from Ops[0]. Could this be too permissive?
auto CreateBinOp = [&](Value *V1, Value *V2) {
- auto Name = PN->getName()+".red";
- return BinaryOperator::CreateWithCopiedFlags(Opcode, V1, V2, Ops[0],
- Name, &BB->back());
+ auto Name = PN->getName() + ".red";
+ return BinaryOperator::CreateWithCopiedFlags(Opcode, V1, V2, Ops[0], Name,
+ &BB->back());
};
// Compute the partial sums of the Ops:
@@ -420,7 +417,7 @@ static bool trySimplifyReductions(Instruction &I) {
// so if we compute SOps in order (i.e. from 0 to N) we can reuse partial
// results.
SmallVector<Value *, 16> SOps(N+1);
- SOps[0] = nullptr; // alternatively we could use NeutralElem
+ SOps[0] = nullptr; // alternatively we could use NeutralElem
SOps[1] = Ops.front();
for (unsigned k = 2; k <= N; k++)
SOps[k] = CreateBinOp(SOps[k-1], Ops[k-1]);
@@ -433,7 +430,7 @@ static bool trySimplifyReductions(Instruction &I) {
// so if we compute SPhis in reverse (i.e. from N down to 0) we can reuse the
// partial sums computed thus far.
SmallVector<Value *, 16> SPhis(N+1);
- SPhis[N] = nullptr; // alternatively we could use NeutralElem
+ SPhis[N] = nullptr; // alternatively we could use NeutralElem
SPhis[N-1] = Phis.back();
for (signed k = N-2; k >= 0; k--)
SPhis[k] = CreateBinOp(SPhis[k+1], Phis[k]);
More information about the llvm-commits
mailing list