[llvm] [LoopUnroll] Simplify reduction operations after a loop unroll (PR #84805)

Ricardo Jesus via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 12 02:54:24 PDT 2024


https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/84805

>From f786d8486fe27cae3e5537ddfb2cf7f8196483d7 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at ed.ac.uk>
Date: Mon, 11 Mar 2024 17:39:20 +0000
Subject: [PATCH 1/3] [LoopUnroll][NFC] Add tests for reduction deinterleaving

---
 llvm/test/CodeGen/AArch64/polybench-3mm.ll    |  53 ++++
 .../LoopUnroll/simplify-reductions.ll         | 295 ++++++++++++++++++
 2 files changed, 348 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/polybench-3mm.ll
 create mode 100644 llvm/test/Transforms/LoopUnroll/simplify-reductions.ll

diff --git a/llvm/test/CodeGen/AArch64/polybench-3mm.ll b/llvm/test/CodeGen/AArch64/polybench-3mm.ll
new file mode 100644
index 00000000000000..034d7f44a95f41
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/polybench-3mm.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: opt -passes=loop-unroll,instcombine -unroll-count=2 %s | llc --mattr=,+neon | FileCheck %s
+
+target triple = "aarch64"
+
+; This is a reduced example adapted from the Polybench 3MM kernel.
+; We are doing something similar to:
+;   double dot = 0.0;
+;   for (long k = 0; k < 1000; k++)
+;     dot += A[k] * B[k*nb];
+;   return dot;
+
+define double @test(ptr %A, ptr %B, i64 %nb) {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d0, #0000000000000000
+; CHECK-NEXT:    lsl x8, x2, #4
+; CHECK-NEXT:    mov x9, xzr
+; CHECK-NEXT:  .LBB0_1: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    add x10, x0, x9, lsl #3
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    add x9, x9, #2
+; CHECK-NEXT:    cmp x9, #1000
+; CHECK-NEXT:    ldp d2, d3, [x10]
+; CHECK-NEXT:    fmadd d0, d1, d2, d0
+; CHECK-NEXT:    ldr d1, [x1, x2, lsl #3]
+; CHECK-NEXT:    add x1, x1, x8
+; CHECK-NEXT:    fmadd d0, d1, d3, d0
+; CHECK-NEXT:    b.ne .LBB0_1
+; CHECK-NEXT:  // %bb.2: // %exit
+; CHECK-NEXT:    ret
+entry:
+  br label %loop
+
+loop:
+  %k = phi i64 [ %k.next, %loop ], [ 0, %entry ]
+  %dot = phi double [ %dot.next, %loop ], [ 0.000000e+00, %entry ]
+  %A.gep = getelementptr inbounds double, ptr %A, i64 %k
+  %A.val = load double, ptr %A.gep, align 8
+  %B.idx = mul nsw i64 %k, %nb
+  %B.gep = getelementptr inbounds double, ptr %B, i64 %B.idx
+  %B.val = load double, ptr %B.gep, align 8
+  %fmul = fmul fast double %B.val, %A.val
+  %dot.next = fadd fast double %fmul, %dot
+  %k.next = add nuw nsw i64 %k, 1
+  %cmp = icmp eq i64 %k.next, 1000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %dot.next.lcssa = phi double [ %dot.next, %loop ]
+  ret double %dot.next.lcssa
+}
diff --git a/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll b/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
new file mode 100644
index 00000000000000..2d4dd76a6cab37
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
@@ -0,0 +1,295 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=loop-unroll -unroll-count=2 < %s | FileCheck %s
+
+; The loops below are variations of:
+;   double sum = 0;
+;   for(long i = 0; i < 1000; i++)
+;     sum += A[i];
+;   return sum;
+
+; Positive test, simple case.
+define double @p1(ptr %A) {
+; CHECK-LABEL: @p1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT:    [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT:    [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    ret double [[SUM_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %sum = phi double [ %sum.next, %loop ], [ 0.000000e+00, %entry ]
+  %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+  %A.val = load double, ptr %A.gep, align 8
+  %sum.next = fadd fast double %A.val, %sum
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp = icmp eq i64 %i.next, 1000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %sum.lcssa = phi double [ %sum.next, %loop ]
+  ret double %sum.lcssa
+}
+
+; Positive test, non-zero starting sum.
+define double @p2(ptr %A, double %acc) {
+; CHECK-LABEL: @p2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi double [ [[ACC:%.*]], [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT:    [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT:    [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    ret double [[SUM_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %sum = phi double [ %sum.next, %loop ], [ %acc, %entry ]
+  %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+  %A.val = load double, ptr %A.gep, align 8
+  %sum.next = fadd fast double %A.val, %sum
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp = icmp eq i64 %i.next, 1000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %sum.lcssa = phi double [ %sum.next, %loop ]
+  ret double %sum.lcssa
+}
+
+; Positive test, non-floating-point type.
+define i64 @p3(ptr %A) {
+; CHECK-LABEL: @p3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[AND:%.*]] = phi i64 [ -1, [[ENTRY]] ], [ [[AND_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i64, ptr [[A_GEP]], align 8
+; CHECK-NEXT:    [[AND_NEXT:%.*]] = and i64 [[A_VAL]], [[AND]]
+; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT:    [[A_VAL_1:%.*]] = load i64, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT:    [[AND_NEXT_1]] = and i64 [[A_VAL_1]], [[AND_NEXT]]
+; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[AND_LCSSA:%.*]] = phi i64 [ [[AND_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    ret i64 [[AND_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %and = phi i64 [ %and.next, %loop ], [ -1, %entry ]
+  %A.gep = getelementptr inbounds i64, ptr %A, i64 %i
+  %A.val = load i64, ptr %A.gep, align 8
+  %and.next = and i64 %A.val, %and
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp = icmp eq i64 %i.next, 1000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %and.lcssa = phi i64 [ %and.next, %loop ]
+  ret i64 %and.lcssa
+}
+
+; Negative test: Sum is used in the loop, which prevents breaking the
+; reduction.
+define double @n1(ptr %A) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT:    [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT:    store double [[SUM_NEXT]], ptr [[A_GEP]], align 8
+; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT:    [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT:    store double [[SUM_NEXT_1]], ptr [[A_GEP_1]], align 8
+; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    ret double [[SUM_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %sum = phi double [ %sum.next, %loop ], [ 0.000000e+00, %entry ]
+  %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+  %A.val = load double, ptr %A.gep, align 8
+  %sum.next = fadd fast double %A.val, %sum
+  store double %sum.next, ptr %A.gep
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp = icmp eq i64 %i.next, 1000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %sum.lcssa = phi double [ %sum.next, %loop ]
+  ret double %sum.lcssa
+}
+
+; Negative test: Reduction op is not associative or commutative.
+define double @n2(ptr %A) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT:    [[SUM_NEXT:%.*]] = fadd double [[A_VAL]], [[SUM]]
+; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT:    [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_1]] = fadd double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    ret double [[SUM_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %sum = phi double [ %sum.next, %loop ], [ 0.000000e+00, %entry ]
+  %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+  %A.val = load double, ptr %A.gep, align 8
+  %sum.next = fadd double %A.val, %sum
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp = icmp eq i64 %i.next, 1000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %sum.lcssa = phi double [ %sum.next, %loop ]
+  ret double %sum.lcssa
+}
+
+; Negative test: Reduction op is unsupported.
+define double @n3(ptr %A) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PROD:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[PROD_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
+; CHECK-NEXT:    [[PROD_NEXT:%.*]] = fmul fast double [[A_VAL]], [[PROD]]
+; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT:    [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT:    [[PROD_NEXT_1]] = fmul fast double [[A_VAL_1]], [[PROD_NEXT]]
+; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[PROD_LCSSA:%.*]] = phi double [ [[PROD_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    ret double [[PROD_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %prod = phi double [ %prod.next, %loop ], [ 1.000000e+00, %entry ]
+  %A.gep = getelementptr inbounds double, ptr %A, i64 %i
+  %A.val = load double, ptr %A.gep, align 8
+  %prod.next = fmul fast double %A.val, %prod
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp = icmp eq i64 %i.next, 1000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %prod.lcssa = phi double [ %prod.next, %loop ]
+  ret double %prod.lcssa
+}
+
+; Negative test: Chain of different operations.
+define i64 @n4(ptr %A) {
+; CHECK-LABEL: @n4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RED:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RED_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i64, ptr [[A_GEP]], align 8
+; CHECK-NEXT:    [[RED_TEMP:%.*]] = add i64 [[A_VAL]], [[RED]]
+; CHECK-NEXT:    [[RED_NEXT:%.*]] = and i64 [[RED_TEMP]], 7
+; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_NEXT]]
+; CHECK-NEXT:    [[A_VAL_1:%.*]] = load i64, ptr [[A_GEP_1]], align 8
+; CHECK-NEXT:    [[RED_TEMP_1:%.*]] = add i64 [[A_VAL_1]], [[RED_NEXT]]
+; CHECK-NEXT:    [[RED_NEXT_1]] = and i64 [[RED_TEMP_1]], 7
+; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RED_LCSSA:%.*]] = phi i64 [ [[RED_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    ret i64 [[RED_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %red = phi i64 [ %red.next, %loop ], [ 0, %entry ]
+  %A.gep = getelementptr inbounds i64, ptr %A, i64 %i
+  %A.val = load i64, ptr %A.gep, align 8
+  %red.temp = add i64 %A.val, %red
+  %red.next = and i64 %red.temp, 7
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp = icmp eq i64 %i.next, 1000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %red.lcssa = phi i64 [ %red.next, %loop ]
+  ret i64 %red.lcssa
+}

>From 1ec52ec6c2e1e4edefd0b2a02fad005655c1303e Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at ed.ac.uk>
Date: Mon, 11 Mar 2024 15:56:08 +0000
Subject: [PATCH 2/3] [LoopUnroll] Simplify reduction operations after a loop
 unroll

Try to simplify reductions (e.g. chains of floating-point adds) into
independent operations after unrolling a loop. This is a very common
pattern in unrolled loops that compute dot products (for example) and
can help with vectorisation.
---
 llvm/lib/Transforms/Utils/LoopUnroll.cpp      | 267 +++++++++
 llvm/test/CodeGen/AArch64/polybench-3mm.ll    |  14 +-
 .../LoopUnroll/AArch64/falkor-prefetch.ll     |  20 +
 .../LoopUnroll/ARM/instr-size-costs.ll        |  20 +-
 llvm/test/Transforms/LoopUnroll/X86/znver3.ll | 550 +++++++++++++-----
 .../Transforms/LoopUnroll/runtime-loop5.ll    |  20 +-
 .../LoopUnroll/runtime-unroll-remainder.ll    |  20 +-
 .../LoopUnroll/simplify-reductions.ll         |  32 +-
 .../PhaseOrdering/SystemZ/sub-xor.ll          | 163 ++++--
 9 files changed, 869 insertions(+), 237 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 6f0d000815726e..b14d05d642e275 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -84,6 +84,11 @@ STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
 STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
                                "latch (completely or otherwise)");
 
+static cl::opt<bool>
+UnrollSimplifyReductions("unroll-simplify-reductions", cl::init(true),
+                         cl::Hidden, cl::desc("Try to simplify reductions "
+                                              "after unrolling a loop."));
+
 static cl::opt<bool>
 UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
                     cl::desc("Allow runtime unrolled loops to be unrolled "
@@ -209,6 +214,258 @@ static bool isEpilogProfitable(Loop *L) {
   return false;
 }
 
+/// This function tries to break apart simple reduction loops like the one
+/// below:
+///
+/// loop:
+///   PN = PHI [SUM2, loop], ...
+///   X = ...
+///   SUM1 = ADD (X, PN)
+///   Y = ...
+///   SUM2 = ADD (Y, SUM1)
+///   br loop
+///
+/// into independent sums of the form:
+///
+/// loop:
+///   PN1 = PHI [SUM1, loop], ...
+///   PN2 = PHI [SUM2, loop], ...
+///   X = ...
+///   SUM1 = ADD (X, PN1)
+///   Y = ...
+///   SUM2 = ADD (Y, PN2)
+///   <Reductions>
+///   br loop
+///
+/// where <Reductions> are new instructions inserted to compute the final
+/// values of the reduction from the partial sums we introduced, in this case:
+///
+/// <Reductions> =
+///   PN.red = ADD (PN1, PN2)
+///   SUM1.red = ADD (SUM1, PN2)
+///   SUM2.red = ADD (SUM1, SUM2)
+///
+/// In practice in most cases only one or two of the reduced values are
+/// required outside the loop so most of the reduction instructions do not
+/// need to be added into the loop. Moreover, these instructions can be sunk
+/// from the loop which happens in later passes.
+///
+/// This is a very common pattern in unrolled loops that compute dot products
+/// (for example) and breaking apart the reduction chains can help greatly with
+/// vectorisation.
+static bool trySimplifyReductions(Instruction &I) {
+  // Check if I is a PHINode (potentially the start of a reduction chain).
+  // Note: For simplicity we only consider loops that consists of a single
+  // basic block that branches to itself.
+  BasicBlock *BB = I.getParent();
+  PHINode *PN = dyn_cast<PHINode>(&I);
+  if (!PN || PN->getBasicBlockIndex(BB) == -1)
+    return false;
+
+  // Attempt to construct a list of instructions that are chained together
+  // (i.e. that perform a reduction).
+  SmallVector<BinaryOperator *, 16> Ops;
+  for (Instruction *Cur = PN, *Next = nullptr; /* true */; Cur = Next,
+                                                           Next = nullptr) {
+    // Try to find the next element in the reduction chain.
+    for (auto *U : Cur->users()) {
+      auto *Candidate = dyn_cast<Instruction>(U);
+      if (Candidate && Candidate->getParent() == BB) {
+        // If we've already found a candidate element for the chain and we find
+        // *another* candidate we bail out as this means the intermediate
+        // values of the reduction are needed within the loop, and so there is
+        // no point in breaking the reduction apart.
+        if (Next)
+          return false;
+        Next = Candidate;
+      }
+    }
+    // If we've reached the start, i.e. the next element in the chain would be
+    // the PN we started with, we are done.
+    if (Next == PN)
+      break;
+    // Else, check if we found a candidate at all and if so if it is a binary
+    // operator.
+    if (!Next || !isa<BinaryOperator>(Next))
+      return false;
+    // If everything checks out, add the new element to the chain.
+    Ops.push_back(cast<BinaryOperator>(Next));
+  }
+
+  // Ensure the reduction comprises at least two instructions, otherwise this
+  // is a trivial reduction of a single element that doesn't need to be
+  // simplified.
+  if (Ops.size() < 2)
+    return false;
+
+  LLVM_DEBUG(
+  dbgs() << "Found candidate reduction: " << I << "\n";
+  for (auto const *Op : Ops)
+    dbgs() << "                         | " << *Op << "\n";
+  );
+
+  // Ensure all instructions perform the same operation and that the operation
+  // is associative and commutative so that we can break the chain apart and
+  // reassociate the Ops.
+  Instruction::BinaryOps const Opcode = Ops[0]->getOpcode();
+  for (auto const *Op : Ops)
+    if (Op->getOpcode() != Opcode || !Op->isAssociative() ||
+        !Op->isCommutative())
+      return false;
+
+  // Define the neutral element of the reduction or bail out if we don't have
+  // one defined.
+  // TODO: This could be generalised to other operations (e.g. MUL's).
+  Value *NeutralElem = nullptr;
+  switch (Opcode) {
+  case Instruction::BinaryOps::Add:
+  case Instruction::BinaryOps::Or:
+  case Instruction::BinaryOps::Xor:
+  case Instruction::BinaryOps::FAdd:
+    NeutralElem = Constant::getNullValue(PN->getType());
+    break;
+  case Instruction::BinaryOps::And:
+    NeutralElem = Constant::getAllOnesValue(PN->getType());
+    break;
+  case Instruction::BinaryOps::Mul:
+  case Instruction::BinaryOps::FMul:
+  default:
+    return false;
+  }
+  assert(NeutralElem && "Neutral element of reduction undefined.");
+
+  // --------------------------------------------------------------------- //
+  // At this point Ops is a list of chained binary operations performing a //
+  // reduction that we know we can break apart.                            //
+  // --------------------------------------------------------------------- //
+
+  // For shorthand, let N be the length of the chain.
+  unsigned const N = Ops.size();
+  LLVM_DEBUG(dbgs() << "Simplifying reduction of length " << N << ".\n");
+
+  // Create new phi nodes for all but the first element in the chain.
+  SmallVector<PHINode *, 16> Phis{PN};
+  for (unsigned i = 1; i < N; i++) {
+    PHINode *NewPN = PHINode::Create(PN->getType(), PN->getNumIncomingValues(),
+                                     PN->getName());
+    // Copy incoming blocks from the first/original PN to the new Phi and set
+    // their incoming values to the neutral element of the reduction.
+    for (auto *IncomingBB : PN->blocks())
+      NewPN->addIncoming(NeutralElem, IncomingBB);
+    NewPN->insertAfter(Phis.back());
+    Phis.push_back(NewPN);
+  }
+
+  // Set the chained operands of the Ops to the Phis and the incoming values of
+  // the Phis (for this BB) to the Ops.
+  for (unsigned i = 0; i < N; i++) {
+    PHINode *Phi = Phis[i];
+    Instruction *Op = Ops[i];
+
+    // Find the index of the operand of Op to replace. The first Op reads its
+    // value from the first Phi node. The other Ops read their value from the
+    // previous Op.
+    Value *OperandToReplace = i == 0 ? cast<Value>(PN) : Ops[i-1];
+    unsigned OperandIdx = Op->getOperand(0) == OperandToReplace ? 0 : 1;
+    assert(Op->getOperand(OperandIdx) == OperandToReplace &&
+           "Operand mismatch. Perhaps a malformed chain?");
+
+    // Set the operand of Op to Phi and the incoming value of Phi for BB to Op.
+    Op->setOperand(OperandIdx, Phi);
+    Phi->setIncomingValueForBlock(BB, Op);
+  }
+
+  // Replace old uses of PN and Ops outside this BB with the updated totals.
+  // The "old" total corresponding to PN now corresponds to the sum of all
+  // Phis. Similarly, the old totals in Ops correspond to the sum of the
+  // partial results in the new Ops up to the index of the Op we want to
+  // compute, plus the sum of the Phis from that index onwards.
+  //
+  // More rigorously, the totals can be computed as follows.
+  // 1. Let k be an index in the list of length N+1 below of the variables we
+  //    want to compute the new totals for:
+  //      { PN, Ops[0], Ops[1], ... }
+  // 2. Let Sum(k) denote the new total to compute for the k-th variable in the
+  //    list above. Then,
+  //      Sum(0) = Sum(PN) = \sum_{0 <= i < N} Phis[i],
+  //      Sum(1) = Sum(Ops[0]) = \sum_{0 <= i < 1} Ops[i] +
+  //                             \sum_{1 <= i < N} Phis[i],
+  //      ...
+  //      Sum(N) = Sum(Ops[N-1]) = \sum_{0 <= i < N} Ops[i].
+  // 3. More generally,
+  //      Sum(k) = Sum(PN) if k == 0 else Sum(Ops[k-1])
+  //             = \sum_{0 <= i < k} Ops[i] +
+  //               \sum_{k <= i < N} Phis[i],
+  //      for 0 <= k <= N.
+  // 4. Finally, if we name the sums in Ops and Phis separately, i.e.
+  //      SOps(k) = \sum_{0 <= i < k} Ops[i],
+  //      SPhis(k) = \sum_{k <= i < N} Phis[i],
+  //    then
+  //      Sum(k) = SOps(k) + SPhis(k), 0 <= k <= N.
+  // .
+
+  // Helper function to create a new binary op.
+  // Note: We copy the flags from Ops[0]. Could this be too permissive?
+  auto CreateBinOp = [&](Value *V1, Value *V2) {
+    auto Name = PN->getName()+".red";
+    return BinaryOperator::CreateWithCopiedFlags(Opcode, V1, V2, Ops[0],
+                                                 Name, &BB->back());
+  };
+
+  // Compute the partial sums of the Ops:
+  //   SOps[k] = \sum_{0 <= i < k} Ops[i], 0 <= k <= N.
+  // For 1 <= k <= N we have:
+  //   SOps[k] = Ops[k-1] + \sum_{0 <= i < k-1} Ops[i]
+  //           = Ops[k-1] + SOps[k-1],
+  // so if we compute SOps in order (i.e. from 0 to N) we can reuse partial
+  // results.
+  SmallVector<Value *, 16> SOps(N+1);
+  SOps[0] = nullptr;  // alternatively we could use NeutralElem
+  SOps[1] = Ops.front();
+  for (unsigned k = 2; k <= N; k++)
+    SOps[k] = CreateBinOp(SOps[k-1], Ops[k-1]);
+
+  // Compute the partial sums of the Phis:
+  //   SPhis[k] = \sum_{k <= i < N} Phis[i], 0 <= k <= N.
+  // Similarly, for 0 <= k <= N-1 we have:
+  //   SPhis[k] = Phis[k] + \sum_{k+1 <= i < N} Phis[i]
+  //            = Phis[k] + SPhis[k+1],
+  // so if we compute SPhis in reverse (i.e. from N down to 0) we can reuse the
+  // partial sums computed thus far.
+  SmallVector<Value *, 16> SPhis(N+1);
+  SPhis[N] = nullptr;  // alternatively we could use NeutralElem
+  SPhis[N-1] = Phis.back();
+  for (signed k = N-2; k >= 0; k--)
+    SPhis[k] = CreateBinOp(SPhis[k+1], Phis[k]);
+
+  // Finally, compute the total sums for PN and Ops from:
+  //   Sums[k] = SOps[k] + SPhis[k], 0 <= k <= N.
+  // These sums might be dead so we had them to a weak tracking vector for
+  // cleanup after.
+  SmallVector<WeakTrackingVH, 16> Sums(N+1);
+  for (unsigned k = 0; k <= N; k++) {
+    // Pick the Op we want to compute the new total for.
+    Value *Op = k == 0 ? cast<Value>(PN) : Ops[k-1];
+
+    Value *SOp = SOps[k], *SPhi = SPhis[k];
+    if (SOp && SPhi)
+      Sums[k] = CreateBinOp(SOp, SPhi);
+    else if (SOp)
+      Sums[k] = SOp;
+    else
+      Sums[k] = SPhi;
+
+    // Replace uses of the old total with the new total.
+    Op->replaceUsesOutsideBlock(Sums[k], BB);
+  }
+
+  // Drop dead totals. In case the totals *are* used they could and should be
+  // sunk, but this happens in later passes so we don't bother doing it here.
+  RecursivelyDeleteTriviallyDeadInstructionsPermissive(Sums);
+
+  return true;
+}
+
 /// Perform some cleanup and simplifications on loops after unrolling. It is
 /// useful to simplify the IV's in the new loop, as well as do a quick
 /// simplify/dce pass of the instructions.
@@ -272,6 +529,16 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
     // have a phi which (potentially indirectly) uses instructions later in
     // the block we're iterating through.
     RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+    // Try to simplify reductions (e.g. chains of floating-point adds) into
+    // independent operations (see more at trySimplifyReductions). This is a
+    // very common pattern in unrolled loops that compute dot products (for
+    // example).
+    //
+    // We do this outside the loop over the instructions above to let
+    // instsimplify kick in before trying to apply this transform.
+    if (UnrollSimplifyReductions)
+      for (PHINode &PN : BB->phis())
+        trySimplifyReductions(PN);
   }
 }
 
diff --git a/llvm/test/CodeGen/AArch64/polybench-3mm.ll b/llvm/test/CodeGen/AArch64/polybench-3mm.ll
index 034d7f44a95f41..309fa0e9a305f7 100644
--- a/llvm/test/CodeGen/AArch64/polybench-3mm.ll
+++ b/llvm/test/CodeGen/AArch64/polybench-3mm.ll
@@ -14,21 +14,23 @@ define double @test(ptr %A, ptr %B, i64 %nb) {
 ; CHECK-LABEL: test:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi d0, #0000000000000000
+; CHECK-NEXT:    movi d1, #0000000000000000
 ; CHECK-NEXT:    lsl x8, x2, #4
 ; CHECK-NEXT:    mov x9, xzr
 ; CHECK-NEXT:  .LBB0_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    add x10, x0, x9, lsl #3
-; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    ldr d2, [x1]
+; CHECK-NEXT:    ldr d5, [x1, x2, lsl #3]
 ; CHECK-NEXT:    add x9, x9, #2
-; CHECK-NEXT:    cmp x9, #1000
-; CHECK-NEXT:    ldp d2, d3, [x10]
-; CHECK-NEXT:    fmadd d0, d1, d2, d0
-; CHECK-NEXT:    ldr d1, [x1, x2, lsl #3]
 ; CHECK-NEXT:    add x1, x1, x8
-; CHECK-NEXT:    fmadd d0, d1, d3, d0
+; CHECK-NEXT:    ldp d3, d4, [x10]
+; CHECK-NEXT:    cmp x9, #1000
+; CHECK-NEXT:    fmadd d0, d2, d3, d0
+; CHECK-NEXT:    fmadd d1, d5, d4, d1
 ; CHECK-NEXT:    b.ne .LBB0_1
 ; CHECK-NEXT:  // %bb.2: // %exit
+; CHECK-NEXT:    fadd d0, d0, d1
 ; CHECK-NEXT:    ret
 entry:
   br label %loop
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll b/llvm/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll
index 045b1c72321a97..874a4aa800c411 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll
@@ -73,6 +73,13 @@ exit:
 ; NOHWPF-LABEL: loop2:
 ; NOHWPF-NEXT: phi
 ; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
 ; NOHWPF-NEXT: getelementptr
 ; NOHWPF-NEXT: load
 ; NOHWPF-NEXT: add
@@ -106,6 +113,13 @@ exit:
 ; NOHWPF-NEXT: add
 ; NOHWPF-NEXT: add
 ; NOHWPF-NEXT: icmp
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
 ; NOHWPF-NEXT: br
 ; NOHWPF-NEXT-LABEL: exit2:
 ;
@@ -113,6 +127,9 @@ exit:
 ; CHECK-LABEL: loop2:
 ; CHECK-NEXT: phi
 ; CHECK-NEXT: phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
 ; CHECK-NEXT: getelementptr
 ; CHECK-NEXT: load
 ; CHECK-NEXT: add
@@ -130,6 +147,9 @@ exit:
 ; CHECK-NEXT: add
 ; CHECK-NEXT: add
 ; CHECK-NEXT: icmp
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
 ; CHECK-NEXT: br
 ; CHECK-NEXT-LABEL: exit2:
 
diff --git a/llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll b/llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll
index 216bf489bc66ec..59208a6da76f62 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll
@@ -195,14 +195,15 @@ define i32 @test_i32_select_optsize(ptr %a, ptr %b, ptr %c) #0 {
 ; CHECK-V8-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-V8:       loop:
 ; CHECK-V8-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[COUNT_1:%.*]], [[LOOP]] ]
-; CHECK-V8-NEXT:    [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-V8-NEXT:    [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT:%.*]], [[LOOP]] ]
+; CHECK-V8-NEXT:    [[ACC1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
 ; CHECK-V8-NEXT:    [[ADDR_A:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]]
 ; CHECK-V8-NEXT:    [[ADDR_B:%.*]] = getelementptr i32, ptr [[B:%.*]], i32 [[IV]]
 ; CHECK-V8-NEXT:    [[DATA_A:%.*]] = load i32, ptr [[ADDR_A]], align 4
 ; CHECK-V8-NEXT:    [[DATA_B:%.*]] = load i32, ptr [[ADDR_B]], align 4
 ; CHECK-V8-NEXT:    [[UGT:%.*]] = icmp ugt i32 [[DATA_A]], [[DATA_B]]
 ; CHECK-V8-NEXT:    [[UMAX:%.*]] = select i1 [[UGT]], i32 [[DATA_A]], i32 [[DATA_B]]
-; CHECK-V8-NEXT:    [[ACC_NEXT:%.*]] = add i32 [[UMAX]], [[ACC]]
+; CHECK-V8-NEXT:    [[ACC_NEXT]] = add i32 [[UMAX]], [[ACC]]
 ; CHECK-V8-NEXT:    [[ADDR_C:%.*]] = getelementptr i32, ptr [[C:%.*]], i32 [[IV]]
 ; CHECK-V8-NEXT:    store i32 [[UMAX]], ptr [[ADDR_C]], align 4
 ; CHECK-V8-NEXT:    [[COUNT:%.*]] = add nuw nsw i32 [[IV]], 1
@@ -212,14 +213,15 @@ define i32 @test_i32_select_optsize(ptr %a, ptr %b, ptr %c) #0 {
 ; CHECK-V8-NEXT:    [[DATA_B_1:%.*]] = load i32, ptr [[ADDR_B_1]], align 4
 ; CHECK-V8-NEXT:    [[UGT_1:%.*]] = icmp ugt i32 [[DATA_A_1]], [[DATA_B_1]]
 ; CHECK-V8-NEXT:    [[UMAX_1:%.*]] = select i1 [[UGT_1]], i32 [[DATA_A_1]], i32 [[DATA_B_1]]
-; CHECK-V8-NEXT:    [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
+; CHECK-V8-NEXT:    [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC1]]
 ; CHECK-V8-NEXT:    [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
 ; CHECK-V8-NEXT:    store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
 ; CHECK-V8-NEXT:    [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
 ; CHECK-V8-NEXT:    [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
+; CHECK-V8-NEXT:    [[ACC_RED:%.*]] = add i32 [[ACC_NEXT]], [[ACC_NEXT_1]]
 ; CHECK-V8-NEXT:    br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK-V8:       exit:
-; CHECK-V8-NEXT:    [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
+; CHECK-V8-NEXT:    [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_RED]], [[LOOP]] ]
 ; CHECK-V8-NEXT:    ret i32 [[ACC_NEXT_LCSSA]]
 ;
 entry:
@@ -251,14 +253,15 @@ define i32 @test_i32_select_minsize(ptr %a, ptr %b, ptr %c) #1 {
 ; CHECK-V8-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-V8:       loop:
 ; CHECK-V8-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[COUNT_1:%.*]], [[LOOP]] ]
-; CHECK-V8-NEXT:    [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-V8-NEXT:    [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT:%.*]], [[LOOP]] ]
+; CHECK-V8-NEXT:    [[ACC1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
 ; CHECK-V8-NEXT:    [[ADDR_A:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]]
 ; CHECK-V8-NEXT:    [[ADDR_B:%.*]] = getelementptr i32, ptr [[B:%.*]], i32 [[IV]]
 ; CHECK-V8-NEXT:    [[DATA_A:%.*]] = load i32, ptr [[ADDR_A]], align 4
 ; CHECK-V8-NEXT:    [[DATA_B:%.*]] = load i32, ptr [[ADDR_B]], align 4
 ; CHECK-V8-NEXT:    [[UGT:%.*]] = icmp ugt i32 [[DATA_A]], [[DATA_B]]
 ; CHECK-V8-NEXT:    [[UMAX:%.*]] = select i1 [[UGT]], i32 [[DATA_A]], i32 [[DATA_B]]
-; CHECK-V8-NEXT:    [[ACC_NEXT:%.*]] = add i32 [[UMAX]], [[ACC]]
+; CHECK-V8-NEXT:    [[ACC_NEXT]] = add i32 [[UMAX]], [[ACC]]
 ; CHECK-V8-NEXT:    [[ADDR_C:%.*]] = getelementptr i32, ptr [[C:%.*]], i32 [[IV]]
 ; CHECK-V8-NEXT:    store i32 [[UMAX]], ptr [[ADDR_C]], align 4
 ; CHECK-V8-NEXT:    [[COUNT:%.*]] = add nuw nsw i32 [[IV]], 1
@@ -268,14 +271,15 @@ define i32 @test_i32_select_minsize(ptr %a, ptr %b, ptr %c) #1 {
 ; CHECK-V8-NEXT:    [[DATA_B_1:%.*]] = load i32, ptr [[ADDR_B_1]], align 4
 ; CHECK-V8-NEXT:    [[UGT_1:%.*]] = icmp ugt i32 [[DATA_A_1]], [[DATA_B_1]]
 ; CHECK-V8-NEXT:    [[UMAX_1:%.*]] = select i1 [[UGT_1]], i32 [[DATA_A_1]], i32 [[DATA_B_1]]
-; CHECK-V8-NEXT:    [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
+; CHECK-V8-NEXT:    [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC1]]
 ; CHECK-V8-NEXT:    [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
 ; CHECK-V8-NEXT:    store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
 ; CHECK-V8-NEXT:    [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
 ; CHECK-V8-NEXT:    [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
+; CHECK-V8-NEXT:    [[ACC_RED:%.*]] = add i32 [[ACC_NEXT]], [[ACC_NEXT_1]]
 ; CHECK-V8-NEXT:    br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK-V8:       exit:
-; CHECK-V8-NEXT:    [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
+; CHECK-V8-NEXT:    [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_RED]], [[LOOP]] ]
 ; CHECK-V8-NEXT:    ret i32 [[ACC_NEXT_LCSSA]]
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopUnroll/X86/znver3.ll b/llvm/test/Transforms/LoopUnroll/X86/znver3.ll
index 30389062a09678..7047e9147a57b2 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/znver3.ll
+++ b/llvm/test/Transforms/LoopUnroll/X86/znver3.ll
@@ -10,523 +10,777 @@ define i32 @test(ptr %ary) "target-cpu"="znver3" {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_127:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_127:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_2:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM3:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM4:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_4:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM5:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_5:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM7:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM8:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_8:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM9:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_9:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM10:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_10:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_11:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM12:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_12:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM13:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_13:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM14:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_14:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM15:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_15:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM16:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_16:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM17:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_17:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM18:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_18:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM19:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_19:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM20:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_20:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM21:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_21:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM22:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_22:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM23:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_23:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM24:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_24:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM25:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_25:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM26:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_26:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM27:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_27:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM28:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_28:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM29:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_29:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM30:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_30:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM31:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_31:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM32:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_32:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM33:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_33:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM34:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_34:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM35:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_35:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM36:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_36:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM37:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_37:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM38:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_38:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM39:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_39:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM40:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_40:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM41:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_41:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM42:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_42:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM43:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_43:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM44:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_44:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM45:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_45:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM46:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_46:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM47:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_47:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM48:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_48:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM49:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_49:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM50:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_50:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM51:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_51:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM52:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_52:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM53:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_53:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM54:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_54:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM55:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_55:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM56:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_56:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM57:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_57:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM58:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_58:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM59:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_59:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM60:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_60:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM61:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_61:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM62:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_62:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM63:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_63:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM64:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_64:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM65:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_65:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM66:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_66:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM67:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_67:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM68:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_68:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM69:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_69:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM70:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_70:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM71:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_71:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM72:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_72:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM73:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_73:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM74:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_74:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM75:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_75:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM76:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_76:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM77:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_77:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM78:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_78:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM79:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_79:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM80:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_80:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM81:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_81:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM82:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_82:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM83:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_83:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM84:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_84:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM85:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_85:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM86:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_86:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM87:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_87:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM88:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_88:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM89:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_89:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM90:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_90:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM91:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_91:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM92:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_92:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM93:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_93:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM94:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_94:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM95:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_95:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM96:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_96:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM97:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_97:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM98:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_98:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM99:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_99:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM100:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_100:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM101:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_101:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM102:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_102:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM103:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_103:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM104:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_104:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM105:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_105:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM106:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_106:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM107:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_107:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM108:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_108:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM109:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_109:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM110:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_110:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM111:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_111:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM112:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_112:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM113:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_113:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM114:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_114:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM115:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_115:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM116:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_116:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM117:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_117:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM118:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_118:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM119:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_119:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM120:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_120:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM121:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_121:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM122:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_122:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM123:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_123:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM124:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_124:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM125:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_125:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM126:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_126:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM127:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_127:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[SUM_NEXT:%.*]] = add nsw i32 [[VAL]], [[SUM]]
+; CHECK-NEXT:    [[SUM_NEXT]] = add nsw i32 [[VAL]], [[SUM]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    [[VAL_1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_1:%.*]] = add nsw i32 [[VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT:    [[SUM_NEXT_1]] = add nsw i32 [[VAL_1]], [[SUM1]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    [[VAL_2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_2:%.*]] = add nsw i32 [[VAL_2]], [[SUM_NEXT_1]]
+; CHECK-NEXT:    [[SUM_NEXT_2]] = add nsw i32 [[VAL_2]], [[SUM2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    [[VAL_3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_3:%.*]] = add nsw i32 [[VAL_3]], [[SUM_NEXT_2]]
+; CHECK-NEXT:    [[SUM_NEXT_3]] = add nsw i32 [[VAL_3]], [[SUM3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_3]]
 ; CHECK-NEXT:    [[VAL_4:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_4:%.*]] = add nsw i32 [[VAL_4]], [[SUM_NEXT_3]]
+; CHECK-NEXT:    [[SUM_NEXT_4]] = add nsw i32 [[VAL_4]], [[SUM4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
 ; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_4]]
 ; CHECK-NEXT:    [[VAL_5:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_5:%.*]] = add nsw i32 [[VAL_5]], [[SUM_NEXT_4]]
+; CHECK-NEXT:    [[SUM_NEXT_5]] = add nsw i32 [[VAL_5]], [[SUM5]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
 ; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_5]]
 ; CHECK-NEXT:    [[VAL_6:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_6:%.*]] = add nsw i32 [[VAL_6]], [[SUM_NEXT_5]]
+; CHECK-NEXT:    [[SUM_NEXT_6]] = add nsw i32 [[VAL_6]], [[SUM6]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
 ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_6]]
 ; CHECK-NEXT:    [[VAL_7:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_7:%.*]] = add nsw i32 [[VAL_7]], [[SUM_NEXT_6]]
+; CHECK-NEXT:    [[SUM_NEXT_7]] = add nsw i32 [[VAL_7]], [[SUM7]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_7]]
 ; CHECK-NEXT:    [[VAL_8:%.*]] = load i32, ptr [[ARRAYIDX_8]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_8:%.*]] = add nsw i32 [[VAL_8]], [[SUM_NEXT_7]]
+; CHECK-NEXT:    [[SUM_NEXT_8]] = add nsw i32 [[VAL_8]], [[SUM8]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_8:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 9
 ; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_8]]
 ; CHECK-NEXT:    [[VAL_9:%.*]] = load i32, ptr [[ARRAYIDX_9]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_9:%.*]] = add nsw i32 [[VAL_9]], [[SUM_NEXT_8]]
+; CHECK-NEXT:    [[SUM_NEXT_9]] = add nsw i32 [[VAL_9]], [[SUM9]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 10
 ; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_9]]
 ; CHECK-NEXT:    [[VAL_10:%.*]] = load i32, ptr [[ARRAYIDX_10]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_10:%.*]] = add nsw i32 [[VAL_10]], [[SUM_NEXT_9]]
+; CHECK-NEXT:    [[SUM_NEXT_10]] = add nsw i32 [[VAL_10]], [[SUM10]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_10:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 11
 ; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_10]]
 ; CHECK-NEXT:    [[VAL_11:%.*]] = load i32, ptr [[ARRAYIDX_11]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_11:%.*]] = add nsw i32 [[VAL_11]], [[SUM_NEXT_10]]
+; CHECK-NEXT:    [[SUM_NEXT_11]] = add nsw i32 [[VAL_11]], [[SUM11]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_11:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 12
 ; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_11]]
 ; CHECK-NEXT:    [[VAL_12:%.*]] = load i32, ptr [[ARRAYIDX_12]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_12:%.*]] = add nsw i32 [[VAL_12]], [[SUM_NEXT_11]]
+; CHECK-NEXT:    [[SUM_NEXT_12]] = add nsw i32 [[VAL_12]], [[SUM12]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_12:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 13
 ; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_12]]
 ; CHECK-NEXT:    [[VAL_13:%.*]] = load i32, ptr [[ARRAYIDX_13]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_13:%.*]] = add nsw i32 [[VAL_13]], [[SUM_NEXT_12]]
+; CHECK-NEXT:    [[SUM_NEXT_13]] = add nsw i32 [[VAL_13]], [[SUM13]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_13:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 14
 ; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_13]]
 ; CHECK-NEXT:    [[VAL_14:%.*]] = load i32, ptr [[ARRAYIDX_14]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_14:%.*]] = add nsw i32 [[VAL_14]], [[SUM_NEXT_13]]
+; CHECK-NEXT:    [[SUM_NEXT_14]] = add nsw i32 [[VAL_14]], [[SUM14]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_14:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 15
 ; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_14]]
 ; CHECK-NEXT:    [[VAL_15:%.*]] = load i32, ptr [[ARRAYIDX_15]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_15:%.*]] = add nsw i32 [[VAL_15]], [[SUM_NEXT_14]]
+; CHECK-NEXT:    [[SUM_NEXT_15]] = add nsw i32 [[VAL_15]], [[SUM15]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 16
 ; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_15]]
 ; CHECK-NEXT:    [[VAL_16:%.*]] = load i32, ptr [[ARRAYIDX_16]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_16:%.*]] = add nsw i32 [[VAL_16]], [[SUM_NEXT_15]]
+; CHECK-NEXT:    [[SUM_NEXT_16]] = add nsw i32 [[VAL_16]], [[SUM16]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_16:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 17
 ; CHECK-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_16]]
 ; CHECK-NEXT:    [[VAL_17:%.*]] = load i32, ptr [[ARRAYIDX_17]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_17:%.*]] = add nsw i32 [[VAL_17]], [[SUM_NEXT_16]]
+; CHECK-NEXT:    [[SUM_NEXT_17]] = add nsw i32 [[VAL_17]], [[SUM17]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_17:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 18
 ; CHECK-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_17]]
 ; CHECK-NEXT:    [[VAL_18:%.*]] = load i32, ptr [[ARRAYIDX_18]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_18:%.*]] = add nsw i32 [[VAL_18]], [[SUM_NEXT_17]]
+; CHECK-NEXT:    [[SUM_NEXT_18]] = add nsw i32 [[VAL_18]], [[SUM18]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_18:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 19
 ; CHECK-NEXT:    [[ARRAYIDX_19:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_18]]
 ; CHECK-NEXT:    [[VAL_19:%.*]] = load i32, ptr [[ARRAYIDX_19]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_19:%.*]] = add nsw i32 [[VAL_19]], [[SUM_NEXT_18]]
+; CHECK-NEXT:    [[SUM_NEXT_19]] = add nsw i32 [[VAL_19]], [[SUM19]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_19:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 20
 ; CHECK-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_19]]
 ; CHECK-NEXT:    [[VAL_20:%.*]] = load i32, ptr [[ARRAYIDX_20]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_20:%.*]] = add nsw i32 [[VAL_20]], [[SUM_NEXT_19]]
+; CHECK-NEXT:    [[SUM_NEXT_20]] = add nsw i32 [[VAL_20]], [[SUM20]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_20:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 21
 ; CHECK-NEXT:    [[ARRAYIDX_21:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_20]]
 ; CHECK-NEXT:    [[VAL_21:%.*]] = load i32, ptr [[ARRAYIDX_21]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_21:%.*]] = add nsw i32 [[VAL_21]], [[SUM_NEXT_20]]
+; CHECK-NEXT:    [[SUM_NEXT_21]] = add nsw i32 [[VAL_21]], [[SUM21]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_21:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 22
 ; CHECK-NEXT:    [[ARRAYIDX_22:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_21]]
 ; CHECK-NEXT:    [[VAL_22:%.*]] = load i32, ptr [[ARRAYIDX_22]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_22:%.*]] = add nsw i32 [[VAL_22]], [[SUM_NEXT_21]]
+; CHECK-NEXT:    [[SUM_NEXT_22]] = add nsw i32 [[VAL_22]], [[SUM22]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_22:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 23
 ; CHECK-NEXT:    [[ARRAYIDX_23:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_22]]
 ; CHECK-NEXT:    [[VAL_23:%.*]] = load i32, ptr [[ARRAYIDX_23]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_23:%.*]] = add nsw i32 [[VAL_23]], [[SUM_NEXT_22]]
+; CHECK-NEXT:    [[SUM_NEXT_23]] = add nsw i32 [[VAL_23]], [[SUM23]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_23:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 24
 ; CHECK-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_23]]
 ; CHECK-NEXT:    [[VAL_24:%.*]] = load i32, ptr [[ARRAYIDX_24]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_24:%.*]] = add nsw i32 [[VAL_24]], [[SUM_NEXT_23]]
+; CHECK-NEXT:    [[SUM_NEXT_24]] = add nsw i32 [[VAL_24]], [[SUM24]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_24:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 25
 ; CHECK-NEXT:    [[ARRAYIDX_25:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_24]]
 ; CHECK-NEXT:    [[VAL_25:%.*]] = load i32, ptr [[ARRAYIDX_25]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_25:%.*]] = add nsw i32 [[VAL_25]], [[SUM_NEXT_24]]
+; CHECK-NEXT:    [[SUM_NEXT_25]] = add nsw i32 [[VAL_25]], [[SUM25]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_25:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 26
 ; CHECK-NEXT:    [[ARRAYIDX_26:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_25]]
 ; CHECK-NEXT:    [[VAL_26:%.*]] = load i32, ptr [[ARRAYIDX_26]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_26:%.*]] = add nsw i32 [[VAL_26]], [[SUM_NEXT_25]]
+; CHECK-NEXT:    [[SUM_NEXT_26]] = add nsw i32 [[VAL_26]], [[SUM26]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_26:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 27
 ; CHECK-NEXT:    [[ARRAYIDX_27:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_26]]
 ; CHECK-NEXT:    [[VAL_27:%.*]] = load i32, ptr [[ARRAYIDX_27]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_27:%.*]] = add nsw i32 [[VAL_27]], [[SUM_NEXT_26]]
+; CHECK-NEXT:    [[SUM_NEXT_27]] = add nsw i32 [[VAL_27]], [[SUM27]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_27:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 28
 ; CHECK-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_27]]
 ; CHECK-NEXT:    [[VAL_28:%.*]] = load i32, ptr [[ARRAYIDX_28]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_28:%.*]] = add nsw i32 [[VAL_28]], [[SUM_NEXT_27]]
+; CHECK-NEXT:    [[SUM_NEXT_28]] = add nsw i32 [[VAL_28]], [[SUM28]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_28:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 29
 ; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_28]]
 ; CHECK-NEXT:    [[VAL_29:%.*]] = load i32, ptr [[ARRAYIDX_29]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_29:%.*]] = add nsw i32 [[VAL_29]], [[SUM_NEXT_28]]
+; CHECK-NEXT:    [[SUM_NEXT_29]] = add nsw i32 [[VAL_29]], [[SUM29]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_29:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 30
 ; CHECK-NEXT:    [[ARRAYIDX_30:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_29]]
 ; CHECK-NEXT:    [[VAL_30:%.*]] = load i32, ptr [[ARRAYIDX_30]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_30:%.*]] = add nsw i32 [[VAL_30]], [[SUM_NEXT_29]]
+; CHECK-NEXT:    [[SUM_NEXT_30]] = add nsw i32 [[VAL_30]], [[SUM30]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_30:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 31
 ; CHECK-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_30]]
 ; CHECK-NEXT:    [[VAL_31:%.*]] = load i32, ptr [[ARRAYIDX_31]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_31:%.*]] = add nsw i32 [[VAL_31]], [[SUM_NEXT_30]]
+; CHECK-NEXT:    [[SUM_NEXT_31]] = add nsw i32 [[VAL_31]], [[SUM31]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_31:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 32
 ; CHECK-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_31]]
 ; CHECK-NEXT:    [[VAL_32:%.*]] = load i32, ptr [[ARRAYIDX_32]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_32:%.*]] = add nsw i32 [[VAL_32]], [[SUM_NEXT_31]]
+; CHECK-NEXT:    [[SUM_NEXT_32]] = add nsw i32 [[VAL_32]], [[SUM32]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_32:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 33
 ; CHECK-NEXT:    [[ARRAYIDX_33:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_32]]
 ; CHECK-NEXT:    [[VAL_33:%.*]] = load i32, ptr [[ARRAYIDX_33]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_33:%.*]] = add nsw i32 [[VAL_33]], [[SUM_NEXT_32]]
+; CHECK-NEXT:    [[SUM_NEXT_33]] = add nsw i32 [[VAL_33]], [[SUM33]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_33:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 34
 ; CHECK-NEXT:    [[ARRAYIDX_34:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_33]]
 ; CHECK-NEXT:    [[VAL_34:%.*]] = load i32, ptr [[ARRAYIDX_34]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_34:%.*]] = add nsw i32 [[VAL_34]], [[SUM_NEXT_33]]
+; CHECK-NEXT:    [[SUM_NEXT_34]] = add nsw i32 [[VAL_34]], [[SUM34]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_34:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 35
 ; CHECK-NEXT:    [[ARRAYIDX_35:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_34]]
 ; CHECK-NEXT:    [[VAL_35:%.*]] = load i32, ptr [[ARRAYIDX_35]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_35:%.*]] = add nsw i32 [[VAL_35]], [[SUM_NEXT_34]]
+; CHECK-NEXT:    [[SUM_NEXT_35]] = add nsw i32 [[VAL_35]], [[SUM35]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_35:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 36
 ; CHECK-NEXT:    [[ARRAYIDX_36:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_35]]
 ; CHECK-NEXT:    [[VAL_36:%.*]] = load i32, ptr [[ARRAYIDX_36]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_36:%.*]] = add nsw i32 [[VAL_36]], [[SUM_NEXT_35]]
+; CHECK-NEXT:    [[SUM_NEXT_36]] = add nsw i32 [[VAL_36]], [[SUM36]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_36:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 37
 ; CHECK-NEXT:    [[ARRAYIDX_37:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_36]]
 ; CHECK-NEXT:    [[VAL_37:%.*]] = load i32, ptr [[ARRAYIDX_37]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_37:%.*]] = add nsw i32 [[VAL_37]], [[SUM_NEXT_36]]
+; CHECK-NEXT:    [[SUM_NEXT_37]] = add nsw i32 [[VAL_37]], [[SUM37]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_37:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 38
 ; CHECK-NEXT:    [[ARRAYIDX_38:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_37]]
 ; CHECK-NEXT:    [[VAL_38:%.*]] = load i32, ptr [[ARRAYIDX_38]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_38:%.*]] = add nsw i32 [[VAL_38]], [[SUM_NEXT_37]]
+; CHECK-NEXT:    [[SUM_NEXT_38]] = add nsw i32 [[VAL_38]], [[SUM38]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_38:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 39
 ; CHECK-NEXT:    [[ARRAYIDX_39:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_38]]
 ; CHECK-NEXT:    [[VAL_39:%.*]] = load i32, ptr [[ARRAYIDX_39]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_39:%.*]] = add nsw i32 [[VAL_39]], [[SUM_NEXT_38]]
+; CHECK-NEXT:    [[SUM_NEXT_39]] = add nsw i32 [[VAL_39]], [[SUM39]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_39:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 40
 ; CHECK-NEXT:    [[ARRAYIDX_40:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_39]]
 ; CHECK-NEXT:    [[VAL_40:%.*]] = load i32, ptr [[ARRAYIDX_40]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_40:%.*]] = add nsw i32 [[VAL_40]], [[SUM_NEXT_39]]
+; CHECK-NEXT:    [[SUM_NEXT_40]] = add nsw i32 [[VAL_40]], [[SUM40]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_40:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 41
 ; CHECK-NEXT:    [[ARRAYIDX_41:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_40]]
 ; CHECK-NEXT:    [[VAL_41:%.*]] = load i32, ptr [[ARRAYIDX_41]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_41:%.*]] = add nsw i32 [[VAL_41]], [[SUM_NEXT_40]]
+; CHECK-NEXT:    [[SUM_NEXT_41]] = add nsw i32 [[VAL_41]], [[SUM41]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_41:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 42
 ; CHECK-NEXT:    [[ARRAYIDX_42:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_41]]
 ; CHECK-NEXT:    [[VAL_42:%.*]] = load i32, ptr [[ARRAYIDX_42]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_42:%.*]] = add nsw i32 [[VAL_42]], [[SUM_NEXT_41]]
+; CHECK-NEXT:    [[SUM_NEXT_42]] = add nsw i32 [[VAL_42]], [[SUM42]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_42:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 43
 ; CHECK-NEXT:    [[ARRAYIDX_43:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_42]]
 ; CHECK-NEXT:    [[VAL_43:%.*]] = load i32, ptr [[ARRAYIDX_43]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_43:%.*]] = add nsw i32 [[VAL_43]], [[SUM_NEXT_42]]
+; CHECK-NEXT:    [[SUM_NEXT_43]] = add nsw i32 [[VAL_43]], [[SUM43]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_43:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 44
 ; CHECK-NEXT:    [[ARRAYIDX_44:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_43]]
 ; CHECK-NEXT:    [[VAL_44:%.*]] = load i32, ptr [[ARRAYIDX_44]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_44:%.*]] = add nsw i32 [[VAL_44]], [[SUM_NEXT_43]]
+; CHECK-NEXT:    [[SUM_NEXT_44]] = add nsw i32 [[VAL_44]], [[SUM44]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_44:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 45
 ; CHECK-NEXT:    [[ARRAYIDX_45:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_44]]
 ; CHECK-NEXT:    [[VAL_45:%.*]] = load i32, ptr [[ARRAYIDX_45]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_45:%.*]] = add nsw i32 [[VAL_45]], [[SUM_NEXT_44]]
+; CHECK-NEXT:    [[SUM_NEXT_45]] = add nsw i32 [[VAL_45]], [[SUM45]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_45:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 46
 ; CHECK-NEXT:    [[ARRAYIDX_46:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_45]]
 ; CHECK-NEXT:    [[VAL_46:%.*]] = load i32, ptr [[ARRAYIDX_46]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_46:%.*]] = add nsw i32 [[VAL_46]], [[SUM_NEXT_45]]
+; CHECK-NEXT:    [[SUM_NEXT_46]] = add nsw i32 [[VAL_46]], [[SUM46]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_46:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 47
 ; CHECK-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_46]]
 ; CHECK-NEXT:    [[VAL_47:%.*]] = load i32, ptr [[ARRAYIDX_47]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_47:%.*]] = add nsw i32 [[VAL_47]], [[SUM_NEXT_46]]
+; CHECK-NEXT:    [[SUM_NEXT_47]] = add nsw i32 [[VAL_47]], [[SUM47]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_47:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 48
 ; CHECK-NEXT:    [[ARRAYIDX_48:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_47]]
 ; CHECK-NEXT:    [[VAL_48:%.*]] = load i32, ptr [[ARRAYIDX_48]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_48:%.*]] = add nsw i32 [[VAL_48]], [[SUM_NEXT_47]]
+; CHECK-NEXT:    [[SUM_NEXT_48]] = add nsw i32 [[VAL_48]], [[SUM48]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_48:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 49
 ; CHECK-NEXT:    [[ARRAYIDX_49:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_48]]
 ; CHECK-NEXT:    [[VAL_49:%.*]] = load i32, ptr [[ARRAYIDX_49]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_49:%.*]] = add nsw i32 [[VAL_49]], [[SUM_NEXT_48]]
+; CHECK-NEXT:    [[SUM_NEXT_49]] = add nsw i32 [[VAL_49]], [[SUM49]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_49:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 50
 ; CHECK-NEXT:    [[ARRAYIDX_50:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_49]]
 ; CHECK-NEXT:    [[VAL_50:%.*]] = load i32, ptr [[ARRAYIDX_50]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_50:%.*]] = add nsw i32 [[VAL_50]], [[SUM_NEXT_49]]
+; CHECK-NEXT:    [[SUM_NEXT_50]] = add nsw i32 [[VAL_50]], [[SUM50]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_50:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 51
 ; CHECK-NEXT:    [[ARRAYIDX_51:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_50]]
 ; CHECK-NEXT:    [[VAL_51:%.*]] = load i32, ptr [[ARRAYIDX_51]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_51:%.*]] = add nsw i32 [[VAL_51]], [[SUM_NEXT_50]]
+; CHECK-NEXT:    [[SUM_NEXT_51]] = add nsw i32 [[VAL_51]], [[SUM51]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_51:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 52
 ; CHECK-NEXT:    [[ARRAYIDX_52:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_51]]
 ; CHECK-NEXT:    [[VAL_52:%.*]] = load i32, ptr [[ARRAYIDX_52]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_52:%.*]] = add nsw i32 [[VAL_52]], [[SUM_NEXT_51]]
+; CHECK-NEXT:    [[SUM_NEXT_52]] = add nsw i32 [[VAL_52]], [[SUM52]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_52:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 53
 ; CHECK-NEXT:    [[ARRAYIDX_53:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_52]]
 ; CHECK-NEXT:    [[VAL_53:%.*]] = load i32, ptr [[ARRAYIDX_53]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_53:%.*]] = add nsw i32 [[VAL_53]], [[SUM_NEXT_52]]
+; CHECK-NEXT:    [[SUM_NEXT_53]] = add nsw i32 [[VAL_53]], [[SUM53]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_53:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 54
 ; CHECK-NEXT:    [[ARRAYIDX_54:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_53]]
 ; CHECK-NEXT:    [[VAL_54:%.*]] = load i32, ptr [[ARRAYIDX_54]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_54:%.*]] = add nsw i32 [[VAL_54]], [[SUM_NEXT_53]]
+; CHECK-NEXT:    [[SUM_NEXT_54]] = add nsw i32 [[VAL_54]], [[SUM54]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_54:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 55
 ; CHECK-NEXT:    [[ARRAYIDX_55:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_54]]
 ; CHECK-NEXT:    [[VAL_55:%.*]] = load i32, ptr [[ARRAYIDX_55]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_55:%.*]] = add nsw i32 [[VAL_55]], [[SUM_NEXT_54]]
+; CHECK-NEXT:    [[SUM_NEXT_55]] = add nsw i32 [[VAL_55]], [[SUM55]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_55:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 56
 ; CHECK-NEXT:    [[ARRAYIDX_56:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_55]]
 ; CHECK-NEXT:    [[VAL_56:%.*]] = load i32, ptr [[ARRAYIDX_56]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_56:%.*]] = add nsw i32 [[VAL_56]], [[SUM_NEXT_55]]
+; CHECK-NEXT:    [[SUM_NEXT_56]] = add nsw i32 [[VAL_56]], [[SUM56]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_56:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 57
 ; CHECK-NEXT:    [[ARRAYIDX_57:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_56]]
 ; CHECK-NEXT:    [[VAL_57:%.*]] = load i32, ptr [[ARRAYIDX_57]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_57:%.*]] = add nsw i32 [[VAL_57]], [[SUM_NEXT_56]]
+; CHECK-NEXT:    [[SUM_NEXT_57]] = add nsw i32 [[VAL_57]], [[SUM57]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_57:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 58
 ; CHECK-NEXT:    [[ARRAYIDX_58:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_57]]
 ; CHECK-NEXT:    [[VAL_58:%.*]] = load i32, ptr [[ARRAYIDX_58]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_58:%.*]] = add nsw i32 [[VAL_58]], [[SUM_NEXT_57]]
+; CHECK-NEXT:    [[SUM_NEXT_58]] = add nsw i32 [[VAL_58]], [[SUM58]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_58:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 59
 ; CHECK-NEXT:    [[ARRAYIDX_59:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_58]]
 ; CHECK-NEXT:    [[VAL_59:%.*]] = load i32, ptr [[ARRAYIDX_59]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_59:%.*]] = add nsw i32 [[VAL_59]], [[SUM_NEXT_58]]
+; CHECK-NEXT:    [[SUM_NEXT_59]] = add nsw i32 [[VAL_59]], [[SUM59]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_59:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 60
 ; CHECK-NEXT:    [[ARRAYIDX_60:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_59]]
 ; CHECK-NEXT:    [[VAL_60:%.*]] = load i32, ptr [[ARRAYIDX_60]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_60:%.*]] = add nsw i32 [[VAL_60]], [[SUM_NEXT_59]]
+; CHECK-NEXT:    [[SUM_NEXT_60]] = add nsw i32 [[VAL_60]], [[SUM60]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_60:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 61
 ; CHECK-NEXT:    [[ARRAYIDX_61:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_60]]
 ; CHECK-NEXT:    [[VAL_61:%.*]] = load i32, ptr [[ARRAYIDX_61]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_61:%.*]] = add nsw i32 [[VAL_61]], [[SUM_NEXT_60]]
+; CHECK-NEXT:    [[SUM_NEXT_61]] = add nsw i32 [[VAL_61]], [[SUM61]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_61:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 62
 ; CHECK-NEXT:    [[ARRAYIDX_62:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_61]]
 ; CHECK-NEXT:    [[VAL_62:%.*]] = load i32, ptr [[ARRAYIDX_62]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_62:%.*]] = add nsw i32 [[VAL_62]], [[SUM_NEXT_61]]
+; CHECK-NEXT:    [[SUM_NEXT_62]] = add nsw i32 [[VAL_62]], [[SUM62]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_62:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 63
 ; CHECK-NEXT:    [[ARRAYIDX_63:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_62]]
 ; CHECK-NEXT:    [[VAL_63:%.*]] = load i32, ptr [[ARRAYIDX_63]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_63:%.*]] = add nsw i32 [[VAL_63]], [[SUM_NEXT_62]]
+; CHECK-NEXT:    [[SUM_NEXT_63]] = add nsw i32 [[VAL_63]], [[SUM63]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_63:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 64
 ; CHECK-NEXT:    [[ARRAYIDX_64:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_63]]
 ; CHECK-NEXT:    [[VAL_64:%.*]] = load i32, ptr [[ARRAYIDX_64]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_64:%.*]] = add nsw i32 [[VAL_64]], [[SUM_NEXT_63]]
+; CHECK-NEXT:    [[SUM_NEXT_64]] = add nsw i32 [[VAL_64]], [[SUM64]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_64:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 65
 ; CHECK-NEXT:    [[ARRAYIDX_65:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_64]]
 ; CHECK-NEXT:    [[VAL_65:%.*]] = load i32, ptr [[ARRAYIDX_65]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_65:%.*]] = add nsw i32 [[VAL_65]], [[SUM_NEXT_64]]
+; CHECK-NEXT:    [[SUM_NEXT_65]] = add nsw i32 [[VAL_65]], [[SUM65]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_65:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 66
 ; CHECK-NEXT:    [[ARRAYIDX_66:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_65]]
 ; CHECK-NEXT:    [[VAL_66:%.*]] = load i32, ptr [[ARRAYIDX_66]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_66:%.*]] = add nsw i32 [[VAL_66]], [[SUM_NEXT_65]]
+; CHECK-NEXT:    [[SUM_NEXT_66]] = add nsw i32 [[VAL_66]], [[SUM66]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_66:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 67
 ; CHECK-NEXT:    [[ARRAYIDX_67:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_66]]
 ; CHECK-NEXT:    [[VAL_67:%.*]] = load i32, ptr [[ARRAYIDX_67]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_67:%.*]] = add nsw i32 [[VAL_67]], [[SUM_NEXT_66]]
+; CHECK-NEXT:    [[SUM_NEXT_67]] = add nsw i32 [[VAL_67]], [[SUM67]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_67:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 68
 ; CHECK-NEXT:    [[ARRAYIDX_68:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_67]]
 ; CHECK-NEXT:    [[VAL_68:%.*]] = load i32, ptr [[ARRAYIDX_68]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_68:%.*]] = add nsw i32 [[VAL_68]], [[SUM_NEXT_67]]
+; CHECK-NEXT:    [[SUM_NEXT_68]] = add nsw i32 [[VAL_68]], [[SUM68]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_68:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 69
 ; CHECK-NEXT:    [[ARRAYIDX_69:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_68]]
 ; CHECK-NEXT:    [[VAL_69:%.*]] = load i32, ptr [[ARRAYIDX_69]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_69:%.*]] = add nsw i32 [[VAL_69]], [[SUM_NEXT_68]]
+; CHECK-NEXT:    [[SUM_NEXT_69]] = add nsw i32 [[VAL_69]], [[SUM69]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_69:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 70
 ; CHECK-NEXT:    [[ARRAYIDX_70:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_69]]
 ; CHECK-NEXT:    [[VAL_70:%.*]] = load i32, ptr [[ARRAYIDX_70]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_70:%.*]] = add nsw i32 [[VAL_70]], [[SUM_NEXT_69]]
+; CHECK-NEXT:    [[SUM_NEXT_70]] = add nsw i32 [[VAL_70]], [[SUM70]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_70:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 71
 ; CHECK-NEXT:    [[ARRAYIDX_71:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_70]]
 ; CHECK-NEXT:    [[VAL_71:%.*]] = load i32, ptr [[ARRAYIDX_71]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_71:%.*]] = add nsw i32 [[VAL_71]], [[SUM_NEXT_70]]
+; CHECK-NEXT:    [[SUM_NEXT_71]] = add nsw i32 [[VAL_71]], [[SUM71]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_71:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 72
 ; CHECK-NEXT:    [[ARRAYIDX_72:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_71]]
 ; CHECK-NEXT:    [[VAL_72:%.*]] = load i32, ptr [[ARRAYIDX_72]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_72:%.*]] = add nsw i32 [[VAL_72]], [[SUM_NEXT_71]]
+; CHECK-NEXT:    [[SUM_NEXT_72]] = add nsw i32 [[VAL_72]], [[SUM72]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_72:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 73
 ; CHECK-NEXT:    [[ARRAYIDX_73:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_72]]
 ; CHECK-NEXT:    [[VAL_73:%.*]] = load i32, ptr [[ARRAYIDX_73]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_73:%.*]] = add nsw i32 [[VAL_73]], [[SUM_NEXT_72]]
+; CHECK-NEXT:    [[SUM_NEXT_73]] = add nsw i32 [[VAL_73]], [[SUM73]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_73:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 74
 ; CHECK-NEXT:    [[ARRAYIDX_74:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_73]]
 ; CHECK-NEXT:    [[VAL_74:%.*]] = load i32, ptr [[ARRAYIDX_74]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_74:%.*]] = add nsw i32 [[VAL_74]], [[SUM_NEXT_73]]
+; CHECK-NEXT:    [[SUM_NEXT_74]] = add nsw i32 [[VAL_74]], [[SUM74]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_74:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 75
 ; CHECK-NEXT:    [[ARRAYIDX_75:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_74]]
 ; CHECK-NEXT:    [[VAL_75:%.*]] = load i32, ptr [[ARRAYIDX_75]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_75:%.*]] = add nsw i32 [[VAL_75]], [[SUM_NEXT_74]]
+; CHECK-NEXT:    [[SUM_NEXT_75]] = add nsw i32 [[VAL_75]], [[SUM75]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_75:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 76
 ; CHECK-NEXT:    [[ARRAYIDX_76:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_75]]
 ; CHECK-NEXT:    [[VAL_76:%.*]] = load i32, ptr [[ARRAYIDX_76]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_76:%.*]] = add nsw i32 [[VAL_76]], [[SUM_NEXT_75]]
+; CHECK-NEXT:    [[SUM_NEXT_76]] = add nsw i32 [[VAL_76]], [[SUM76]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_76:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 77
 ; CHECK-NEXT:    [[ARRAYIDX_77:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_76]]
 ; CHECK-NEXT:    [[VAL_77:%.*]] = load i32, ptr [[ARRAYIDX_77]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_77:%.*]] = add nsw i32 [[VAL_77]], [[SUM_NEXT_76]]
+; CHECK-NEXT:    [[SUM_NEXT_77]] = add nsw i32 [[VAL_77]], [[SUM77]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_77:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 78
 ; CHECK-NEXT:    [[ARRAYIDX_78:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_77]]
 ; CHECK-NEXT:    [[VAL_78:%.*]] = load i32, ptr [[ARRAYIDX_78]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_78:%.*]] = add nsw i32 [[VAL_78]], [[SUM_NEXT_77]]
+; CHECK-NEXT:    [[SUM_NEXT_78]] = add nsw i32 [[VAL_78]], [[SUM78]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_78:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 79
 ; CHECK-NEXT:    [[ARRAYIDX_79:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_78]]
 ; CHECK-NEXT:    [[VAL_79:%.*]] = load i32, ptr [[ARRAYIDX_79]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_79:%.*]] = add nsw i32 [[VAL_79]], [[SUM_NEXT_78]]
+; CHECK-NEXT:    [[SUM_NEXT_79]] = add nsw i32 [[VAL_79]], [[SUM79]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_79:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 80
 ; CHECK-NEXT:    [[ARRAYIDX_80:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_79]]
 ; CHECK-NEXT:    [[VAL_80:%.*]] = load i32, ptr [[ARRAYIDX_80]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_80:%.*]] = add nsw i32 [[VAL_80]], [[SUM_NEXT_79]]
+; CHECK-NEXT:    [[SUM_NEXT_80]] = add nsw i32 [[VAL_80]], [[SUM80]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_80:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 81
 ; CHECK-NEXT:    [[ARRAYIDX_81:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_80]]
 ; CHECK-NEXT:    [[VAL_81:%.*]] = load i32, ptr [[ARRAYIDX_81]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_81:%.*]] = add nsw i32 [[VAL_81]], [[SUM_NEXT_80]]
+; CHECK-NEXT:    [[SUM_NEXT_81]] = add nsw i32 [[VAL_81]], [[SUM81]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_81:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 82
 ; CHECK-NEXT:    [[ARRAYIDX_82:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_81]]
 ; CHECK-NEXT:    [[VAL_82:%.*]] = load i32, ptr [[ARRAYIDX_82]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_82:%.*]] = add nsw i32 [[VAL_82]], [[SUM_NEXT_81]]
+; CHECK-NEXT:    [[SUM_NEXT_82]] = add nsw i32 [[VAL_82]], [[SUM82]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_82:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 83
 ; CHECK-NEXT:    [[ARRAYIDX_83:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_82]]
 ; CHECK-NEXT:    [[VAL_83:%.*]] = load i32, ptr [[ARRAYIDX_83]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_83:%.*]] = add nsw i32 [[VAL_83]], [[SUM_NEXT_82]]
+; CHECK-NEXT:    [[SUM_NEXT_83]] = add nsw i32 [[VAL_83]], [[SUM83]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_83:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 84
 ; CHECK-NEXT:    [[ARRAYIDX_84:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_83]]
 ; CHECK-NEXT:    [[VAL_84:%.*]] = load i32, ptr [[ARRAYIDX_84]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_84:%.*]] = add nsw i32 [[VAL_84]], [[SUM_NEXT_83]]
+; CHECK-NEXT:    [[SUM_NEXT_84]] = add nsw i32 [[VAL_84]], [[SUM84]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_84:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 85
 ; CHECK-NEXT:    [[ARRAYIDX_85:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_84]]
 ; CHECK-NEXT:    [[VAL_85:%.*]] = load i32, ptr [[ARRAYIDX_85]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_85:%.*]] = add nsw i32 [[VAL_85]], [[SUM_NEXT_84]]
+; CHECK-NEXT:    [[SUM_NEXT_85]] = add nsw i32 [[VAL_85]], [[SUM85]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_85:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 86
 ; CHECK-NEXT:    [[ARRAYIDX_86:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_85]]
 ; CHECK-NEXT:    [[VAL_86:%.*]] = load i32, ptr [[ARRAYIDX_86]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_86:%.*]] = add nsw i32 [[VAL_86]], [[SUM_NEXT_85]]
+; CHECK-NEXT:    [[SUM_NEXT_86]] = add nsw i32 [[VAL_86]], [[SUM86]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_86:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 87
 ; CHECK-NEXT:    [[ARRAYIDX_87:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_86]]
 ; CHECK-NEXT:    [[VAL_87:%.*]] = load i32, ptr [[ARRAYIDX_87]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_87:%.*]] = add nsw i32 [[VAL_87]], [[SUM_NEXT_86]]
+; CHECK-NEXT:    [[SUM_NEXT_87]] = add nsw i32 [[VAL_87]], [[SUM87]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_87:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 88
 ; CHECK-NEXT:    [[ARRAYIDX_88:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_87]]
 ; CHECK-NEXT:    [[VAL_88:%.*]] = load i32, ptr [[ARRAYIDX_88]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_88:%.*]] = add nsw i32 [[VAL_88]], [[SUM_NEXT_87]]
+; CHECK-NEXT:    [[SUM_NEXT_88]] = add nsw i32 [[VAL_88]], [[SUM88]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_88:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 89
 ; CHECK-NEXT:    [[ARRAYIDX_89:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_88]]
 ; CHECK-NEXT:    [[VAL_89:%.*]] = load i32, ptr [[ARRAYIDX_89]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_89:%.*]] = add nsw i32 [[VAL_89]], [[SUM_NEXT_88]]
+; CHECK-NEXT:    [[SUM_NEXT_89]] = add nsw i32 [[VAL_89]], [[SUM89]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_89:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 90
 ; CHECK-NEXT:    [[ARRAYIDX_90:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_89]]
 ; CHECK-NEXT:    [[VAL_90:%.*]] = load i32, ptr [[ARRAYIDX_90]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_90:%.*]] = add nsw i32 [[VAL_90]], [[SUM_NEXT_89]]
+; CHECK-NEXT:    [[SUM_NEXT_90]] = add nsw i32 [[VAL_90]], [[SUM90]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_90:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 91
 ; CHECK-NEXT:    [[ARRAYIDX_91:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_90]]
 ; CHECK-NEXT:    [[VAL_91:%.*]] = load i32, ptr [[ARRAYIDX_91]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_91:%.*]] = add nsw i32 [[VAL_91]], [[SUM_NEXT_90]]
+; CHECK-NEXT:    [[SUM_NEXT_91]] = add nsw i32 [[VAL_91]], [[SUM91]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_91:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 92
 ; CHECK-NEXT:    [[ARRAYIDX_92:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_91]]
 ; CHECK-NEXT:    [[VAL_92:%.*]] = load i32, ptr [[ARRAYIDX_92]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_92:%.*]] = add nsw i32 [[VAL_92]], [[SUM_NEXT_91]]
+; CHECK-NEXT:    [[SUM_NEXT_92]] = add nsw i32 [[VAL_92]], [[SUM92]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_92:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 93
 ; CHECK-NEXT:    [[ARRAYIDX_93:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_92]]
 ; CHECK-NEXT:    [[VAL_93:%.*]] = load i32, ptr [[ARRAYIDX_93]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_93:%.*]] = add nsw i32 [[VAL_93]], [[SUM_NEXT_92]]
+; CHECK-NEXT:    [[SUM_NEXT_93]] = add nsw i32 [[VAL_93]], [[SUM93]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_93:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 94
 ; CHECK-NEXT:    [[ARRAYIDX_94:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_93]]
 ; CHECK-NEXT:    [[VAL_94:%.*]] = load i32, ptr [[ARRAYIDX_94]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_94:%.*]] = add nsw i32 [[VAL_94]], [[SUM_NEXT_93]]
+; CHECK-NEXT:    [[SUM_NEXT_94]] = add nsw i32 [[VAL_94]], [[SUM94]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_94:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 95
 ; CHECK-NEXT:    [[ARRAYIDX_95:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_94]]
 ; CHECK-NEXT:    [[VAL_95:%.*]] = load i32, ptr [[ARRAYIDX_95]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_95:%.*]] = add nsw i32 [[VAL_95]], [[SUM_NEXT_94]]
+; CHECK-NEXT:    [[SUM_NEXT_95]] = add nsw i32 [[VAL_95]], [[SUM95]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_95:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 96
 ; CHECK-NEXT:    [[ARRAYIDX_96:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_95]]
 ; CHECK-NEXT:    [[VAL_96:%.*]] = load i32, ptr [[ARRAYIDX_96]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_96:%.*]] = add nsw i32 [[VAL_96]], [[SUM_NEXT_95]]
+; CHECK-NEXT:    [[SUM_NEXT_96]] = add nsw i32 [[VAL_96]], [[SUM96]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_96:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 97
 ; CHECK-NEXT:    [[ARRAYIDX_97:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_96]]
 ; CHECK-NEXT:    [[VAL_97:%.*]] = load i32, ptr [[ARRAYIDX_97]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_97:%.*]] = add nsw i32 [[VAL_97]], [[SUM_NEXT_96]]
+; CHECK-NEXT:    [[SUM_NEXT_97]] = add nsw i32 [[VAL_97]], [[SUM97]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_97:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 98
 ; CHECK-NEXT:    [[ARRAYIDX_98:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_97]]
 ; CHECK-NEXT:    [[VAL_98:%.*]] = load i32, ptr [[ARRAYIDX_98]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_98:%.*]] = add nsw i32 [[VAL_98]], [[SUM_NEXT_97]]
+; CHECK-NEXT:    [[SUM_NEXT_98]] = add nsw i32 [[VAL_98]], [[SUM98]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_98:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 99
 ; CHECK-NEXT:    [[ARRAYIDX_99:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_98]]
 ; CHECK-NEXT:    [[VAL_99:%.*]] = load i32, ptr [[ARRAYIDX_99]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_99:%.*]] = add nsw i32 [[VAL_99]], [[SUM_NEXT_98]]
+; CHECK-NEXT:    [[SUM_NEXT_99]] = add nsw i32 [[VAL_99]], [[SUM99]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_99:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 100
 ; CHECK-NEXT:    [[ARRAYIDX_100:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_99]]
 ; CHECK-NEXT:    [[VAL_100:%.*]] = load i32, ptr [[ARRAYIDX_100]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_100:%.*]] = add nsw i32 [[VAL_100]], [[SUM_NEXT_99]]
+; CHECK-NEXT:    [[SUM_NEXT_100]] = add nsw i32 [[VAL_100]], [[SUM100]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_100:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 101
 ; CHECK-NEXT:    [[ARRAYIDX_101:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_100]]
 ; CHECK-NEXT:    [[VAL_101:%.*]] = load i32, ptr [[ARRAYIDX_101]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_101:%.*]] = add nsw i32 [[VAL_101]], [[SUM_NEXT_100]]
+; CHECK-NEXT:    [[SUM_NEXT_101]] = add nsw i32 [[VAL_101]], [[SUM101]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_101:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 102
 ; CHECK-NEXT:    [[ARRAYIDX_102:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_101]]
 ; CHECK-NEXT:    [[VAL_102:%.*]] = load i32, ptr [[ARRAYIDX_102]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_102:%.*]] = add nsw i32 [[VAL_102]], [[SUM_NEXT_101]]
+; CHECK-NEXT:    [[SUM_NEXT_102]] = add nsw i32 [[VAL_102]], [[SUM102]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_102:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 103
 ; CHECK-NEXT:    [[ARRAYIDX_103:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_102]]
 ; CHECK-NEXT:    [[VAL_103:%.*]] = load i32, ptr [[ARRAYIDX_103]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_103:%.*]] = add nsw i32 [[VAL_103]], [[SUM_NEXT_102]]
+; CHECK-NEXT:    [[SUM_NEXT_103]] = add nsw i32 [[VAL_103]], [[SUM103]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_103:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 104
 ; CHECK-NEXT:    [[ARRAYIDX_104:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_103]]
 ; CHECK-NEXT:    [[VAL_104:%.*]] = load i32, ptr [[ARRAYIDX_104]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_104:%.*]] = add nsw i32 [[VAL_104]], [[SUM_NEXT_103]]
+; CHECK-NEXT:    [[SUM_NEXT_104]] = add nsw i32 [[VAL_104]], [[SUM104]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_104:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 105
 ; CHECK-NEXT:    [[ARRAYIDX_105:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_104]]
 ; CHECK-NEXT:    [[VAL_105:%.*]] = load i32, ptr [[ARRAYIDX_105]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_105:%.*]] = add nsw i32 [[VAL_105]], [[SUM_NEXT_104]]
+; CHECK-NEXT:    [[SUM_NEXT_105]] = add nsw i32 [[VAL_105]], [[SUM105]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_105:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 106
 ; CHECK-NEXT:    [[ARRAYIDX_106:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_105]]
 ; CHECK-NEXT:    [[VAL_106:%.*]] = load i32, ptr [[ARRAYIDX_106]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_106:%.*]] = add nsw i32 [[VAL_106]], [[SUM_NEXT_105]]
+; CHECK-NEXT:    [[SUM_NEXT_106]] = add nsw i32 [[VAL_106]], [[SUM106]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_106:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 107
 ; CHECK-NEXT:    [[ARRAYIDX_107:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_106]]
 ; CHECK-NEXT:    [[VAL_107:%.*]] = load i32, ptr [[ARRAYIDX_107]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_107:%.*]] = add nsw i32 [[VAL_107]], [[SUM_NEXT_106]]
+; CHECK-NEXT:    [[SUM_NEXT_107]] = add nsw i32 [[VAL_107]], [[SUM107]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_107:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 108
 ; CHECK-NEXT:    [[ARRAYIDX_108:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_107]]
 ; CHECK-NEXT:    [[VAL_108:%.*]] = load i32, ptr [[ARRAYIDX_108]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_108:%.*]] = add nsw i32 [[VAL_108]], [[SUM_NEXT_107]]
+; CHECK-NEXT:    [[SUM_NEXT_108]] = add nsw i32 [[VAL_108]], [[SUM108]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_108:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 109
 ; CHECK-NEXT:    [[ARRAYIDX_109:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_108]]
 ; CHECK-NEXT:    [[VAL_109:%.*]] = load i32, ptr [[ARRAYIDX_109]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_109:%.*]] = add nsw i32 [[VAL_109]], [[SUM_NEXT_108]]
+; CHECK-NEXT:    [[SUM_NEXT_109]] = add nsw i32 [[VAL_109]], [[SUM109]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_109:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 110
 ; CHECK-NEXT:    [[ARRAYIDX_110:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_109]]
 ; CHECK-NEXT:    [[VAL_110:%.*]] = load i32, ptr [[ARRAYIDX_110]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_110:%.*]] = add nsw i32 [[VAL_110]], [[SUM_NEXT_109]]
+; CHECK-NEXT:    [[SUM_NEXT_110]] = add nsw i32 [[VAL_110]], [[SUM110]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_110:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 111
 ; CHECK-NEXT:    [[ARRAYIDX_111:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_110]]
 ; CHECK-NEXT:    [[VAL_111:%.*]] = load i32, ptr [[ARRAYIDX_111]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_111:%.*]] = add nsw i32 [[VAL_111]], [[SUM_NEXT_110]]
+; CHECK-NEXT:    [[SUM_NEXT_111]] = add nsw i32 [[VAL_111]], [[SUM111]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_111:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 112
 ; CHECK-NEXT:    [[ARRAYIDX_112:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_111]]
 ; CHECK-NEXT:    [[VAL_112:%.*]] = load i32, ptr [[ARRAYIDX_112]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_112:%.*]] = add nsw i32 [[VAL_112]], [[SUM_NEXT_111]]
+; CHECK-NEXT:    [[SUM_NEXT_112]] = add nsw i32 [[VAL_112]], [[SUM112]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_112:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 113
 ; CHECK-NEXT:    [[ARRAYIDX_113:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_112]]
 ; CHECK-NEXT:    [[VAL_113:%.*]] = load i32, ptr [[ARRAYIDX_113]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_113:%.*]] = add nsw i32 [[VAL_113]], [[SUM_NEXT_112]]
+; CHECK-NEXT:    [[SUM_NEXT_113]] = add nsw i32 [[VAL_113]], [[SUM113]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_113:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 114
 ; CHECK-NEXT:    [[ARRAYIDX_114:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_113]]
 ; CHECK-NEXT:    [[VAL_114:%.*]] = load i32, ptr [[ARRAYIDX_114]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_114:%.*]] = add nsw i32 [[VAL_114]], [[SUM_NEXT_113]]
+; CHECK-NEXT:    [[SUM_NEXT_114]] = add nsw i32 [[VAL_114]], [[SUM114]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_114:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 115
 ; CHECK-NEXT:    [[ARRAYIDX_115:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_114]]
 ; CHECK-NEXT:    [[VAL_115:%.*]] = load i32, ptr [[ARRAYIDX_115]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_115:%.*]] = add nsw i32 [[VAL_115]], [[SUM_NEXT_114]]
+; CHECK-NEXT:    [[SUM_NEXT_115]] = add nsw i32 [[VAL_115]], [[SUM115]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_115:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 116
 ; CHECK-NEXT:    [[ARRAYIDX_116:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_115]]
 ; CHECK-NEXT:    [[VAL_116:%.*]] = load i32, ptr [[ARRAYIDX_116]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_116:%.*]] = add nsw i32 [[VAL_116]], [[SUM_NEXT_115]]
+; CHECK-NEXT:    [[SUM_NEXT_116]] = add nsw i32 [[VAL_116]], [[SUM116]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_116:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 117
 ; CHECK-NEXT:    [[ARRAYIDX_117:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_116]]
 ; CHECK-NEXT:    [[VAL_117:%.*]] = load i32, ptr [[ARRAYIDX_117]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_117:%.*]] = add nsw i32 [[VAL_117]], [[SUM_NEXT_116]]
+; CHECK-NEXT:    [[SUM_NEXT_117]] = add nsw i32 [[VAL_117]], [[SUM117]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_117:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 118
 ; CHECK-NEXT:    [[ARRAYIDX_118:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_117]]
 ; CHECK-NEXT:    [[VAL_118:%.*]] = load i32, ptr [[ARRAYIDX_118]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_118:%.*]] = add nsw i32 [[VAL_118]], [[SUM_NEXT_117]]
+; CHECK-NEXT:    [[SUM_NEXT_118]] = add nsw i32 [[VAL_118]], [[SUM118]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_118:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 119
 ; CHECK-NEXT:    [[ARRAYIDX_119:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_118]]
 ; CHECK-NEXT:    [[VAL_119:%.*]] = load i32, ptr [[ARRAYIDX_119]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_119:%.*]] = add nsw i32 [[VAL_119]], [[SUM_NEXT_118]]
+; CHECK-NEXT:    [[SUM_NEXT_119]] = add nsw i32 [[VAL_119]], [[SUM119]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_119:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 120
 ; CHECK-NEXT:    [[ARRAYIDX_120:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_119]]
 ; CHECK-NEXT:    [[VAL_120:%.*]] = load i32, ptr [[ARRAYIDX_120]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_120:%.*]] = add nsw i32 [[VAL_120]], [[SUM_NEXT_119]]
+; CHECK-NEXT:    [[SUM_NEXT_120]] = add nsw i32 [[VAL_120]], [[SUM120]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_120:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 121
 ; CHECK-NEXT:    [[ARRAYIDX_121:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_120]]
 ; CHECK-NEXT:    [[VAL_121:%.*]] = load i32, ptr [[ARRAYIDX_121]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_121:%.*]] = add nsw i32 [[VAL_121]], [[SUM_NEXT_120]]
+; CHECK-NEXT:    [[SUM_NEXT_121]] = add nsw i32 [[VAL_121]], [[SUM121]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_121:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 122
 ; CHECK-NEXT:    [[ARRAYIDX_122:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_121]]
 ; CHECK-NEXT:    [[VAL_122:%.*]] = load i32, ptr [[ARRAYIDX_122]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_122:%.*]] = add nsw i32 [[VAL_122]], [[SUM_NEXT_121]]
+; CHECK-NEXT:    [[SUM_NEXT_122]] = add nsw i32 [[VAL_122]], [[SUM122]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_122:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 123
 ; CHECK-NEXT:    [[ARRAYIDX_123:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_122]]
 ; CHECK-NEXT:    [[VAL_123:%.*]] = load i32, ptr [[ARRAYIDX_123]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_123:%.*]] = add nsw i32 [[VAL_123]], [[SUM_NEXT_122]]
+; CHECK-NEXT:    [[SUM_NEXT_123]] = add nsw i32 [[VAL_123]], [[SUM123]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_123:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 124
 ; CHECK-NEXT:    [[ARRAYIDX_124:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_123]]
 ; CHECK-NEXT:    [[VAL_124:%.*]] = load i32, ptr [[ARRAYIDX_124]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_124:%.*]] = add nsw i32 [[VAL_124]], [[SUM_NEXT_123]]
+; CHECK-NEXT:    [[SUM_NEXT_124]] = add nsw i32 [[VAL_124]], [[SUM124]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_124:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 125
 ; CHECK-NEXT:    [[ARRAYIDX_125:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_124]]
 ; CHECK-NEXT:    [[VAL_125:%.*]] = load i32, ptr [[ARRAYIDX_125]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_125:%.*]] = add nsw i32 [[VAL_125]], [[SUM_NEXT_124]]
+; CHECK-NEXT:    [[SUM_NEXT_125]] = add nsw i32 [[VAL_125]], [[SUM125]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_125:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 126
 ; CHECK-NEXT:    [[ARRAYIDX_126:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_125]]
 ; CHECK-NEXT:    [[VAL_126:%.*]] = load i32, ptr [[ARRAYIDX_126]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_126:%.*]] = add nsw i32 [[VAL_126]], [[SUM_NEXT_125]]
+; CHECK-NEXT:    [[SUM_NEXT_126]] = add nsw i32 [[VAL_126]], [[SUM126]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_126:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 127
 ; CHECK-NEXT:    [[ARRAYIDX_127:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_126]]
 ; CHECK-NEXT:    [[VAL_127:%.*]] = load i32, ptr [[ARRAYIDX_127]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_127]] = add nsw i32 [[VAL_127]], [[SUM_NEXT_126]]
+; CHECK-NEXT:    [[SUM_NEXT_127]] = add nsw i32 [[VAL_127]], [[SUM127]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_127]] = add nuw nsw i64 [[INDVARS_IV]], 128
 ; CHECK-NEXT:    [[EXITCOND_NOT_127:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_127]], 8192
+; CHECK-NEXT:    [[SUM_RED254:%.*]] = add nsw i32 [[SUM_NEXT]], [[SUM_NEXT_1]]
+; CHECK-NEXT:    [[SUM_RED255:%.*]] = add nsw i32 [[SUM_RED254]], [[SUM_NEXT_2]]
+; CHECK-NEXT:    [[SUM_RED256:%.*]] = add nsw i32 [[SUM_RED255]], [[SUM_NEXT_3]]
+; CHECK-NEXT:    [[SUM_RED257:%.*]] = add nsw i32 [[SUM_RED256]], [[SUM_NEXT_4]]
+; CHECK-NEXT:    [[SUM_RED258:%.*]] = add nsw i32 [[SUM_RED257]], [[SUM_NEXT_5]]
+; CHECK-NEXT:    [[SUM_RED259:%.*]] = add nsw i32 [[SUM_RED258]], [[SUM_NEXT_6]]
+; CHECK-NEXT:    [[SUM_RED260:%.*]] = add nsw i32 [[SUM_RED259]], [[SUM_NEXT_7]]
+; CHECK-NEXT:    [[SUM_RED261:%.*]] = add nsw i32 [[SUM_RED260]], [[SUM_NEXT_8]]
+; CHECK-NEXT:    [[SUM_RED262:%.*]] = add nsw i32 [[SUM_RED261]], [[SUM_NEXT_9]]
+; CHECK-NEXT:    [[SUM_RED263:%.*]] = add nsw i32 [[SUM_RED262]], [[SUM_NEXT_10]]
+; CHECK-NEXT:    [[SUM_RED264:%.*]] = add nsw i32 [[SUM_RED263]], [[SUM_NEXT_11]]
+; CHECK-NEXT:    [[SUM_RED265:%.*]] = add nsw i32 [[SUM_RED264]], [[SUM_NEXT_12]]
+; CHECK-NEXT:    [[SUM_RED266:%.*]] = add nsw i32 [[SUM_RED265]], [[SUM_NEXT_13]]
+; CHECK-NEXT:    [[SUM_RED267:%.*]] = add nsw i32 [[SUM_RED266]], [[SUM_NEXT_14]]
+; CHECK-NEXT:    [[SUM_RED268:%.*]] = add nsw i32 [[SUM_RED267]], [[SUM_NEXT_15]]
+; CHECK-NEXT:    [[SUM_RED269:%.*]] = add nsw i32 [[SUM_RED268]], [[SUM_NEXT_16]]
+; CHECK-NEXT:    [[SUM_RED270:%.*]] = add nsw i32 [[SUM_RED269]], [[SUM_NEXT_17]]
+; CHECK-NEXT:    [[SUM_RED271:%.*]] = add nsw i32 [[SUM_RED270]], [[SUM_NEXT_18]]
+; CHECK-NEXT:    [[SUM_RED272:%.*]] = add nsw i32 [[SUM_RED271]], [[SUM_NEXT_19]]
+; CHECK-NEXT:    [[SUM_RED273:%.*]] = add nsw i32 [[SUM_RED272]], [[SUM_NEXT_20]]
+; CHECK-NEXT:    [[SUM_RED274:%.*]] = add nsw i32 [[SUM_RED273]], [[SUM_NEXT_21]]
+; CHECK-NEXT:    [[SUM_RED275:%.*]] = add nsw i32 [[SUM_RED274]], [[SUM_NEXT_22]]
+; CHECK-NEXT:    [[SUM_RED276:%.*]] = add nsw i32 [[SUM_RED275]], [[SUM_NEXT_23]]
+; CHECK-NEXT:    [[SUM_RED277:%.*]] = add nsw i32 [[SUM_RED276]], [[SUM_NEXT_24]]
+; CHECK-NEXT:    [[SUM_RED278:%.*]] = add nsw i32 [[SUM_RED277]], [[SUM_NEXT_25]]
+; CHECK-NEXT:    [[SUM_RED279:%.*]] = add nsw i32 [[SUM_RED278]], [[SUM_NEXT_26]]
+; CHECK-NEXT:    [[SUM_RED280:%.*]] = add nsw i32 [[SUM_RED279]], [[SUM_NEXT_27]]
+; CHECK-NEXT:    [[SUM_RED281:%.*]] = add nsw i32 [[SUM_RED280]], [[SUM_NEXT_28]]
+; CHECK-NEXT:    [[SUM_RED282:%.*]] = add nsw i32 [[SUM_RED281]], [[SUM_NEXT_29]]
+; CHECK-NEXT:    [[SUM_RED283:%.*]] = add nsw i32 [[SUM_RED282]], [[SUM_NEXT_30]]
+; CHECK-NEXT:    [[SUM_RED284:%.*]] = add nsw i32 [[SUM_RED283]], [[SUM_NEXT_31]]
+; CHECK-NEXT:    [[SUM_RED285:%.*]] = add nsw i32 [[SUM_RED284]], [[SUM_NEXT_32]]
+; CHECK-NEXT:    [[SUM_RED286:%.*]] = add nsw i32 [[SUM_RED285]], [[SUM_NEXT_33]]
+; CHECK-NEXT:    [[SUM_RED287:%.*]] = add nsw i32 [[SUM_RED286]], [[SUM_NEXT_34]]
+; CHECK-NEXT:    [[SUM_RED288:%.*]] = add nsw i32 [[SUM_RED287]], [[SUM_NEXT_35]]
+; CHECK-NEXT:    [[SUM_RED289:%.*]] = add nsw i32 [[SUM_RED288]], [[SUM_NEXT_36]]
+; CHECK-NEXT:    [[SUM_RED290:%.*]] = add nsw i32 [[SUM_RED289]], [[SUM_NEXT_37]]
+; CHECK-NEXT:    [[SUM_RED291:%.*]] = add nsw i32 [[SUM_RED290]], [[SUM_NEXT_38]]
+; CHECK-NEXT:    [[SUM_RED292:%.*]] = add nsw i32 [[SUM_RED291]], [[SUM_NEXT_39]]
+; CHECK-NEXT:    [[SUM_RED293:%.*]] = add nsw i32 [[SUM_RED292]], [[SUM_NEXT_40]]
+; CHECK-NEXT:    [[SUM_RED294:%.*]] = add nsw i32 [[SUM_RED293]], [[SUM_NEXT_41]]
+; CHECK-NEXT:    [[SUM_RED295:%.*]] = add nsw i32 [[SUM_RED294]], [[SUM_NEXT_42]]
+; CHECK-NEXT:    [[SUM_RED296:%.*]] = add nsw i32 [[SUM_RED295]], [[SUM_NEXT_43]]
+; CHECK-NEXT:    [[SUM_RED297:%.*]] = add nsw i32 [[SUM_RED296]], [[SUM_NEXT_44]]
+; CHECK-NEXT:    [[SUM_RED298:%.*]] = add nsw i32 [[SUM_RED297]], [[SUM_NEXT_45]]
+; CHECK-NEXT:    [[SUM_RED299:%.*]] = add nsw i32 [[SUM_RED298]], [[SUM_NEXT_46]]
+; CHECK-NEXT:    [[SUM_RED300:%.*]] = add nsw i32 [[SUM_RED299]], [[SUM_NEXT_47]]
+; CHECK-NEXT:    [[SUM_RED301:%.*]] = add nsw i32 [[SUM_RED300]], [[SUM_NEXT_48]]
+; CHECK-NEXT:    [[SUM_RED302:%.*]] = add nsw i32 [[SUM_RED301]], [[SUM_NEXT_49]]
+; CHECK-NEXT:    [[SUM_RED303:%.*]] = add nsw i32 [[SUM_RED302]], [[SUM_NEXT_50]]
+; CHECK-NEXT:    [[SUM_RED304:%.*]] = add nsw i32 [[SUM_RED303]], [[SUM_NEXT_51]]
+; CHECK-NEXT:    [[SUM_RED305:%.*]] = add nsw i32 [[SUM_RED304]], [[SUM_NEXT_52]]
+; CHECK-NEXT:    [[SUM_RED306:%.*]] = add nsw i32 [[SUM_RED305]], [[SUM_NEXT_53]]
+; CHECK-NEXT:    [[SUM_RED307:%.*]] = add nsw i32 [[SUM_RED306]], [[SUM_NEXT_54]]
+; CHECK-NEXT:    [[SUM_RED308:%.*]] = add nsw i32 [[SUM_RED307]], [[SUM_NEXT_55]]
+; CHECK-NEXT:    [[SUM_RED309:%.*]] = add nsw i32 [[SUM_RED308]], [[SUM_NEXT_56]]
+; CHECK-NEXT:    [[SUM_RED310:%.*]] = add nsw i32 [[SUM_RED309]], [[SUM_NEXT_57]]
+; CHECK-NEXT:    [[SUM_RED311:%.*]] = add nsw i32 [[SUM_RED310]], [[SUM_NEXT_58]]
+; CHECK-NEXT:    [[SUM_RED312:%.*]] = add nsw i32 [[SUM_RED311]], [[SUM_NEXT_59]]
+; CHECK-NEXT:    [[SUM_RED313:%.*]] = add nsw i32 [[SUM_RED312]], [[SUM_NEXT_60]]
+; CHECK-NEXT:    [[SUM_RED314:%.*]] = add nsw i32 [[SUM_RED313]], [[SUM_NEXT_61]]
+; CHECK-NEXT:    [[SUM_RED315:%.*]] = add nsw i32 [[SUM_RED314]], [[SUM_NEXT_62]]
+; CHECK-NEXT:    [[SUM_RED316:%.*]] = add nsw i32 [[SUM_RED315]], [[SUM_NEXT_63]]
+; CHECK-NEXT:    [[SUM_RED317:%.*]] = add nsw i32 [[SUM_RED316]], [[SUM_NEXT_64]]
+; CHECK-NEXT:    [[SUM_RED318:%.*]] = add nsw i32 [[SUM_RED317]], [[SUM_NEXT_65]]
+; CHECK-NEXT:    [[SUM_RED319:%.*]] = add nsw i32 [[SUM_RED318]], [[SUM_NEXT_66]]
+; CHECK-NEXT:    [[SUM_RED320:%.*]] = add nsw i32 [[SUM_RED319]], [[SUM_NEXT_67]]
+; CHECK-NEXT:    [[SUM_RED321:%.*]] = add nsw i32 [[SUM_RED320]], [[SUM_NEXT_68]]
+; CHECK-NEXT:    [[SUM_RED322:%.*]] = add nsw i32 [[SUM_RED321]], [[SUM_NEXT_69]]
+; CHECK-NEXT:    [[SUM_RED323:%.*]] = add nsw i32 [[SUM_RED322]], [[SUM_NEXT_70]]
+; CHECK-NEXT:    [[SUM_RED324:%.*]] = add nsw i32 [[SUM_RED323]], [[SUM_NEXT_71]]
+; CHECK-NEXT:    [[SUM_RED325:%.*]] = add nsw i32 [[SUM_RED324]], [[SUM_NEXT_72]]
+; CHECK-NEXT:    [[SUM_RED326:%.*]] = add nsw i32 [[SUM_RED325]], [[SUM_NEXT_73]]
+; CHECK-NEXT:    [[SUM_RED327:%.*]] = add nsw i32 [[SUM_RED326]], [[SUM_NEXT_74]]
+; CHECK-NEXT:    [[SUM_RED328:%.*]] = add nsw i32 [[SUM_RED327]], [[SUM_NEXT_75]]
+; CHECK-NEXT:    [[SUM_RED329:%.*]] = add nsw i32 [[SUM_RED328]], [[SUM_NEXT_76]]
+; CHECK-NEXT:    [[SUM_RED330:%.*]] = add nsw i32 [[SUM_RED329]], [[SUM_NEXT_77]]
+; CHECK-NEXT:    [[SUM_RED331:%.*]] = add nsw i32 [[SUM_RED330]], [[SUM_NEXT_78]]
+; CHECK-NEXT:    [[SUM_RED332:%.*]] = add nsw i32 [[SUM_RED331]], [[SUM_NEXT_79]]
+; CHECK-NEXT:    [[SUM_RED333:%.*]] = add nsw i32 [[SUM_RED332]], [[SUM_NEXT_80]]
+; CHECK-NEXT:    [[SUM_RED334:%.*]] = add nsw i32 [[SUM_RED333]], [[SUM_NEXT_81]]
+; CHECK-NEXT:    [[SUM_RED335:%.*]] = add nsw i32 [[SUM_RED334]], [[SUM_NEXT_82]]
+; CHECK-NEXT:    [[SUM_RED336:%.*]] = add nsw i32 [[SUM_RED335]], [[SUM_NEXT_83]]
+; CHECK-NEXT:    [[SUM_RED337:%.*]] = add nsw i32 [[SUM_RED336]], [[SUM_NEXT_84]]
+; CHECK-NEXT:    [[SUM_RED338:%.*]] = add nsw i32 [[SUM_RED337]], [[SUM_NEXT_85]]
+; CHECK-NEXT:    [[SUM_RED339:%.*]] = add nsw i32 [[SUM_RED338]], [[SUM_NEXT_86]]
+; CHECK-NEXT:    [[SUM_RED340:%.*]] = add nsw i32 [[SUM_RED339]], [[SUM_NEXT_87]]
+; CHECK-NEXT:    [[SUM_RED341:%.*]] = add nsw i32 [[SUM_RED340]], [[SUM_NEXT_88]]
+; CHECK-NEXT:    [[SUM_RED342:%.*]] = add nsw i32 [[SUM_RED341]], [[SUM_NEXT_89]]
+; CHECK-NEXT:    [[SUM_RED343:%.*]] = add nsw i32 [[SUM_RED342]], [[SUM_NEXT_90]]
+; CHECK-NEXT:    [[SUM_RED344:%.*]] = add nsw i32 [[SUM_RED343]], [[SUM_NEXT_91]]
+; CHECK-NEXT:    [[SUM_RED345:%.*]] = add nsw i32 [[SUM_RED344]], [[SUM_NEXT_92]]
+; CHECK-NEXT:    [[SUM_RED346:%.*]] = add nsw i32 [[SUM_RED345]], [[SUM_NEXT_93]]
+; CHECK-NEXT:    [[SUM_RED347:%.*]] = add nsw i32 [[SUM_RED346]], [[SUM_NEXT_94]]
+; CHECK-NEXT:    [[SUM_RED348:%.*]] = add nsw i32 [[SUM_RED347]], [[SUM_NEXT_95]]
+; CHECK-NEXT:    [[SUM_RED349:%.*]] = add nsw i32 [[SUM_RED348]], [[SUM_NEXT_96]]
+; CHECK-NEXT:    [[SUM_RED350:%.*]] = add nsw i32 [[SUM_RED349]], [[SUM_NEXT_97]]
+; CHECK-NEXT:    [[SUM_RED351:%.*]] = add nsw i32 [[SUM_RED350]], [[SUM_NEXT_98]]
+; CHECK-NEXT:    [[SUM_RED352:%.*]] = add nsw i32 [[SUM_RED351]], [[SUM_NEXT_99]]
+; CHECK-NEXT:    [[SUM_RED353:%.*]] = add nsw i32 [[SUM_RED352]], [[SUM_NEXT_100]]
+; CHECK-NEXT:    [[SUM_RED354:%.*]] = add nsw i32 [[SUM_RED353]], [[SUM_NEXT_101]]
+; CHECK-NEXT:    [[SUM_RED355:%.*]] = add nsw i32 [[SUM_RED354]], [[SUM_NEXT_102]]
+; CHECK-NEXT:    [[SUM_RED356:%.*]] = add nsw i32 [[SUM_RED355]], [[SUM_NEXT_103]]
+; CHECK-NEXT:    [[SUM_RED357:%.*]] = add nsw i32 [[SUM_RED356]], [[SUM_NEXT_104]]
+; CHECK-NEXT:    [[SUM_RED358:%.*]] = add nsw i32 [[SUM_RED357]], [[SUM_NEXT_105]]
+; CHECK-NEXT:    [[SUM_RED359:%.*]] = add nsw i32 [[SUM_RED358]], [[SUM_NEXT_106]]
+; CHECK-NEXT:    [[SUM_RED360:%.*]] = add nsw i32 [[SUM_RED359]], [[SUM_NEXT_107]]
+; CHECK-NEXT:    [[SUM_RED361:%.*]] = add nsw i32 [[SUM_RED360]], [[SUM_NEXT_108]]
+; CHECK-NEXT:    [[SUM_RED362:%.*]] = add nsw i32 [[SUM_RED361]], [[SUM_NEXT_109]]
+; CHECK-NEXT:    [[SUM_RED363:%.*]] = add nsw i32 [[SUM_RED362]], [[SUM_NEXT_110]]
+; CHECK-NEXT:    [[SUM_RED364:%.*]] = add nsw i32 [[SUM_RED363]], [[SUM_NEXT_111]]
+; CHECK-NEXT:    [[SUM_RED365:%.*]] = add nsw i32 [[SUM_RED364]], [[SUM_NEXT_112]]
+; CHECK-NEXT:    [[SUM_RED366:%.*]] = add nsw i32 [[SUM_RED365]], [[SUM_NEXT_113]]
+; CHECK-NEXT:    [[SUM_RED367:%.*]] = add nsw i32 [[SUM_RED366]], [[SUM_NEXT_114]]
+; CHECK-NEXT:    [[SUM_RED368:%.*]] = add nsw i32 [[SUM_RED367]], [[SUM_NEXT_115]]
+; CHECK-NEXT:    [[SUM_RED369:%.*]] = add nsw i32 [[SUM_RED368]], [[SUM_NEXT_116]]
+; CHECK-NEXT:    [[SUM_RED370:%.*]] = add nsw i32 [[SUM_RED369]], [[SUM_NEXT_117]]
+; CHECK-NEXT:    [[SUM_RED371:%.*]] = add nsw i32 [[SUM_RED370]], [[SUM_NEXT_118]]
+; CHECK-NEXT:    [[SUM_RED372:%.*]] = add nsw i32 [[SUM_RED371]], [[SUM_NEXT_119]]
+; CHECK-NEXT:    [[SUM_RED373:%.*]] = add nsw i32 [[SUM_RED372]], [[SUM_NEXT_120]]
+; CHECK-NEXT:    [[SUM_RED374:%.*]] = add nsw i32 [[SUM_RED373]], [[SUM_NEXT_121]]
+; CHECK-NEXT:    [[SUM_RED375:%.*]] = add nsw i32 [[SUM_RED374]], [[SUM_NEXT_122]]
+; CHECK-NEXT:    [[SUM_RED376:%.*]] = add nsw i32 [[SUM_RED375]], [[SUM_NEXT_123]]
+; CHECK-NEXT:    [[SUM_RED377:%.*]] = add nsw i32 [[SUM_RED376]], [[SUM_NEXT_124]]
+; CHECK-NEXT:    [[SUM_RED378:%.*]] = add nsw i32 [[SUM_RED377]], [[SUM_NEXT_125]]
+; CHECK-NEXT:    [[SUM_RED379:%.*]] = add nsw i32 [[SUM_RED378]], [[SUM_NEXT_126]]
+; CHECK-NEXT:    [[SUM_RED380:%.*]] = add nsw i32 [[SUM_RED379]], [[SUM_NEXT_127]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT_127]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_127]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_RED380]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_NEXT_LCSSA]]
 ;
 entry:
@@ -559,7 +813,14 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM2:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM3:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_2:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM4:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM5:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_4:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM6:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_5:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM7:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM8:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -613,7 +874,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48:%.*]] = mul i32 [[DUMMY47]], [[DUMMY47]]
 ; CHECK-NEXT:    [[DUMMY49:%.*]] = mul i32 [[DUMMY48]], [[DUMMY48]]
 ; CHECK-NEXT:    [[DUMMY50:%.*]] = mul i32 [[DUMMY49]], [[DUMMY49]]
-; CHECK-NEXT:    [[SUM_NEXT:%.*]] = add nsw i32 [[DUMMY50]], [[SUM]]
+; CHECK-NEXT:    [[SUM_NEXT]] = add nsw i32 [[DUMMY50]], [[SUM]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    [[VAL_1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
@@ -667,7 +928,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48_1:%.*]] = mul i32 [[DUMMY47_1]], [[DUMMY47_1]]
 ; CHECK-NEXT:    [[DUMMY49_1:%.*]] = mul i32 [[DUMMY48_1]], [[DUMMY48_1]]
 ; CHECK-NEXT:    [[DUMMY50_1:%.*]] = mul i32 [[DUMMY49_1]], [[DUMMY49_1]]
-; CHECK-NEXT:    [[SUM_NEXT_1:%.*]] = add nsw i32 [[DUMMY50_1]], [[SUM_NEXT]]
+; CHECK-NEXT:    [[SUM_NEXT_1]] = add nsw i32 [[DUMMY50_1]], [[SUM2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    [[VAL_2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
@@ -721,7 +982,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48_2:%.*]] = mul i32 [[DUMMY47_2]], [[DUMMY47_2]]
 ; CHECK-NEXT:    [[DUMMY49_2:%.*]] = mul i32 [[DUMMY48_2]], [[DUMMY48_2]]
 ; CHECK-NEXT:    [[DUMMY50_2:%.*]] = mul i32 [[DUMMY49_2]], [[DUMMY49_2]]
-; CHECK-NEXT:    [[SUM_NEXT_2:%.*]] = add nsw i32 [[DUMMY50_2]], [[SUM_NEXT_1]]
+; CHECK-NEXT:    [[SUM_NEXT_2]] = add nsw i32 [[DUMMY50_2]], [[SUM3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    [[VAL_3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
@@ -775,7 +1036,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48_3:%.*]] = mul i32 [[DUMMY47_3]], [[DUMMY47_3]]
 ; CHECK-NEXT:    [[DUMMY49_3:%.*]] = mul i32 [[DUMMY48_3]], [[DUMMY48_3]]
 ; CHECK-NEXT:    [[DUMMY50_3:%.*]] = mul i32 [[DUMMY49_3]], [[DUMMY49_3]]
-; CHECK-NEXT:    [[SUM_NEXT_3:%.*]] = add nsw i32 [[DUMMY50_3]], [[SUM_NEXT_2]]
+; CHECK-NEXT:    [[SUM_NEXT_3]] = add nsw i32 [[DUMMY50_3]], [[SUM4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_3]]
 ; CHECK-NEXT:    [[VAL_4:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
@@ -829,7 +1090,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48_4:%.*]] = mul i32 [[DUMMY47_4]], [[DUMMY47_4]]
 ; CHECK-NEXT:    [[DUMMY49_4:%.*]] = mul i32 [[DUMMY48_4]], [[DUMMY48_4]]
 ; CHECK-NEXT:    [[DUMMY50_4:%.*]] = mul i32 [[DUMMY49_4]], [[DUMMY49_4]]
-; CHECK-NEXT:    [[SUM_NEXT_4:%.*]] = add nsw i32 [[DUMMY50_4]], [[SUM_NEXT_3]]
+; CHECK-NEXT:    [[SUM_NEXT_4]] = add nsw i32 [[DUMMY50_4]], [[SUM5]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
 ; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_4]]
 ; CHECK-NEXT:    [[VAL_5:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
@@ -883,7 +1144,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48_5:%.*]] = mul i32 [[DUMMY47_5]], [[DUMMY47_5]]
 ; CHECK-NEXT:    [[DUMMY49_5:%.*]] = mul i32 [[DUMMY48_5]], [[DUMMY48_5]]
 ; CHECK-NEXT:    [[DUMMY50_5:%.*]] = mul i32 [[DUMMY49_5]], [[DUMMY49_5]]
-; CHECK-NEXT:    [[SUM_NEXT_5:%.*]] = add nsw i32 [[DUMMY50_5]], [[SUM_NEXT_4]]
+; CHECK-NEXT:    [[SUM_NEXT_5]] = add nsw i32 [[DUMMY50_5]], [[SUM6]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
 ; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_5]]
 ; CHECK-NEXT:    [[VAL_6:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
@@ -937,7 +1198,7 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48_6:%.*]] = mul i32 [[DUMMY47_6]], [[DUMMY47_6]]
 ; CHECK-NEXT:    [[DUMMY49_6:%.*]] = mul i32 [[DUMMY48_6]], [[DUMMY48_6]]
 ; CHECK-NEXT:    [[DUMMY50_6:%.*]] = mul i32 [[DUMMY49_6]], [[DUMMY49_6]]
-; CHECK-NEXT:    [[SUM_NEXT_6:%.*]] = add nsw i32 [[DUMMY50_6]], [[SUM_NEXT_5]]
+; CHECK-NEXT:    [[SUM_NEXT_6]] = add nsw i32 [[DUMMY50_6]], [[SUM7]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
 ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_6]]
 ; CHECK-NEXT:    [[VAL_7:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
@@ -991,15 +1252,22 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48_7:%.*]] = mul i32 [[DUMMY47_7]], [[DUMMY47_7]]
 ; CHECK-NEXT:    [[DUMMY49_7:%.*]] = mul i32 [[DUMMY48_7]], [[DUMMY48_7]]
 ; CHECK-NEXT:    [[DUMMY50_7:%.*]] = mul i32 [[DUMMY49_7]], [[DUMMY49_7]]
-; CHECK-NEXT:    [[SUM_NEXT_7]] = add nsw i32 [[DUMMY50_7]], [[SUM_NEXT_6]]
+; CHECK-NEXT:    [[SUM_NEXT_7]] = add nsw i32 [[DUMMY50_7]], [[SUM8]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT:    [[SUM_RED15:%.*]] = add nsw i32 [[SUM_NEXT]], [[SUM_NEXT_1]]
+; CHECK-NEXT:    [[SUM_RED16:%.*]] = add nsw i32 [[SUM_RED15]], [[SUM_NEXT_2]]
+; CHECK-NEXT:    [[SUM_RED17:%.*]] = add nsw i32 [[SUM_RED16]], [[SUM_NEXT_3]]
+; CHECK-NEXT:    [[SUM_RED18:%.*]] = add nsw i32 [[SUM_RED17]], [[SUM_NEXT_4]]
+; CHECK-NEXT:    [[SUM_RED19:%.*]] = add nsw i32 [[SUM_RED18]], [[SUM_NEXT_5]]
+; CHECK-NEXT:    [[SUM_RED20:%.*]] = add nsw i32 [[SUM_RED19]], [[SUM_NEXT_6]]
+; CHECK-NEXT:    [[SUM_RED21:%.*]] = add nsw i32 [[SUM_RED20]], [[SUM_NEXT_7]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.cond.cleanup.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[SUM_NEXT_LCSSA_PH_PH:%.*]] = phi i32 [ [[SUM_NEXT_7]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM_NEXT_LCSSA_PH_PH:%.*]] = phi i32 [ [[SUM_RED21]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[SUM_UNR_PH:%.*]] = phi i32 [ [[SUM_NEXT_7]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM_UNR_PH:%.*]] = phi i32 [ [[SUM_RED21]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_UNR_LCSSA]]
 ; CHECK:       for.cond.cleanup.unr-lcssa:
 ; CHECK-NEXT:    [[SUM_NEXT_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_NEXT_LCSSA_PH_PH]], [[FOR_COND_CLEANUP_UNR_LCSSA_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
index b44206a044c291..be9db67f3c9aa4 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -75,31 +75,37 @@ define i3 @test(ptr %a, i3 %n) {
 ; UNROLL-4-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-4:       for.body:
 ; UNROLL-4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ]
-; UNROLL-4-NEXT:    [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT:    [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT:    [[SUM_024:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_1:%.*]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT:    [[SUM_025:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_2:%.*]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT:    [[SUM_026:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
 ; UNROLL-4-NEXT:    [[NITER:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
 ; UNROLL-4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i3, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; UNROLL-4-NEXT:    [[TMP2:%.*]] = load i3, ptr [[ARRAYIDX]], align 1
-; UNROLL-4-NEXT:    [[ADD:%.*]] = add nsw i3 [[TMP2]], [[SUM_02]]
+; UNROLL-4-NEXT:    [[ADD]] = add nsw i3 [[TMP2]], [[SUM_02]]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; UNROLL-4-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
 ; UNROLL-4-NEXT:    [[TMP3:%.*]] = load i3, ptr [[ARRAYIDX_1]], align 1
-; UNROLL-4-NEXT:    [[ADD_1:%.*]] = add nsw i3 [[TMP3]], [[ADD]]
+; UNROLL-4-NEXT:    [[ADD_1]] = add nsw i3 [[TMP3]], [[SUM_024]]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; UNROLL-4-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
 ; UNROLL-4-NEXT:    [[TMP4:%.*]] = load i3, ptr [[ARRAYIDX_2]], align 1
-; UNROLL-4-NEXT:    [[ADD_2:%.*]] = add nsw i3 [[TMP4]], [[ADD_1]]
+; UNROLL-4-NEXT:    [[ADD_2]] = add nsw i3 [[TMP4]], [[SUM_025]]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
 ; UNROLL-4-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
 ; UNROLL-4-NEXT:    [[TMP5:%.*]] = load i3, ptr [[ARRAYIDX_3]], align 1
-; UNROLL-4-NEXT:    [[ADD_3]] = add nsw i3 [[TMP5]], [[ADD_2]]
+; UNROLL-4-NEXT:    [[ADD_3]] = add nsw i3 [[TMP5]], [[SUM_026]]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; UNROLL-4-NEXT:    [[NITER_NEXT_3]] = add i3 [[NITER]], -4
 ; UNROLL-4-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i3 [[NITER_NEXT_3]], [[UNROLL_ITER]]
+; UNROLL-4-NEXT:    [[SUM_02_RED9:%.*]] = add nsw i3 [[ADD]], [[ADD_1]]
+; UNROLL-4-NEXT:    [[SUM_02_RED10:%.*]] = add nsw i3 [[SUM_02_RED9]], [[ADD_2]]
+; UNROLL-4-NEXT:    [[SUM_02_RED11:%.*]] = add nsw i3 [[SUM_02_RED10]], [[ADD_3]]
 ; UNROLL-4-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; UNROLL-4:       for.end.loopexit.unr-lcssa.loopexit:
-; UNROLL-4-NEXT:    [[ADD_LCSSA_PH_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT:    [[ADD_LCSSA_PH_PH:%.*]] = phi i3 [ [[SUM_02_RED11]], [[FOR_BODY]] ]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
-; UNROLL-4-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i3 [ [[SUM_02_RED11]], [[FOR_BODY]] ]
 ; UNROLL-4-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
 ; UNROLL-4:       for.end.loopexit.unr-lcssa:
 ; UNROLL-4-NEXT:    [[ADD_LCSSA_PH:%.*]] = phi i3 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[ADD_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
index fea9df610c3e82..ab38fa84efc7f7 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
@@ -16,11 +16,14 @@ define i32 @unroll(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N)
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    [[C_010_RED7:%.*]] = add nsw i32 [[ADD:%.*]], [[ADD_1:%.*]]
+; CHECK-NEXT:    [[C_010_RED8:%.*]] = add nsw i32 [[C_010_RED7]], [[ADD_2:%.*]]
+; CHECK-NEXT:    [[C_010_RED9:%.*]] = add nsw i32 [[C_010_RED8]], [[ADD_3:%.*]]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]
 ; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[ADD_LCSSA_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_LR_PH]] ], [ [[C_010_RED9]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[C_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD_3]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[C_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[C_010_RED9]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
 ; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_EPIL_PREHEADER:%.*]]
 ; CHECK:       for.body.epil.preheader:
@@ -64,35 +67,38 @@ define i32 @unroll(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N)
 ; CHECK-NEXT:    ret i32 [[C_0_LCSSA]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C_0102:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_1]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C_0103:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_2]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C_0104:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP8]], [[TMP7]]
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL]], [[C_010]]
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[C_010]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX2_1]], align 4
 ; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]]
-; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[ADD]]
+; CHECK-NEXT:    [[ADD_1]] = add nsw i32 [[MUL_1]], [[C_0102]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX2_2]], align 4
 ; CHECK-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]]
-; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[MUL_2]], [[ADD_1]]
+; CHECK-NEXT:    [[ADD_2]] = add nsw i32 [[MUL_2]], [[C_0103]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX2_3]], align 4
 ; CHECK-NEXT:    [[MUL_3:%.*]] = mul nsw i32 [[TMP14]], [[TMP13]]
-; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[MUL_3]], [[ADD_2]]
+; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[MUL_3]], [[C_0104]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
diff --git a/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll b/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
index 2d4dd76a6cab37..a1ae26ece595b7 100644
--- a/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
+++ b/llvm/test/Transforms/LoopUnroll/simplify-reductions.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=loop-unroll -unroll-count=2 < %s | FileCheck %s
+; RUN: opt -S -passes=loop-unroll -unroll-count=2 -unroll-simplify-reductions=true < %s | FileCheck %s
 
 ; The loops below are variations of:
 ;   double sum = 0;
@@ -14,19 +14,21 @@ define double @p1(ptr %A) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM1:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
 ; CHECK-NEXT:    [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
-; CHECK-NEXT:    [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT:    [[SUM_NEXT]] = fadd fast double [[A_VAL]], [[SUM]]
 ; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
 ; CHECK-NEXT:    [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
-; CHECK-NEXT:    [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT:    [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM1]]
 ; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    [[SUM_RED:%.*]] = fadd fast double [[SUM_NEXT]], [[SUM_NEXT_1]]
 ; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi double [ [[SUM_RED]], [[LOOP]] ]
 ; CHECK-NEXT:    ret double [[SUM_LCSSA]]
 ;
 entry:
@@ -54,19 +56,21 @@ define double @p2(ptr %A, double %acc) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi double [ [[ACC:%.*]], [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi double [ [[ACC:%.*]], [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM1:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[SUM_NEXT_1:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[I]]
 ; CHECK-NEXT:    [[A_VAL:%.*]] = load double, ptr [[A_GEP]], align 8
-; CHECK-NEXT:    [[SUM_NEXT:%.*]] = fadd fast double [[A_VAL]], [[SUM]]
+; CHECK-NEXT:    [[SUM_NEXT]] = fadd fast double [[A_VAL]], [[SUM]]
 ; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_NEXT]]
 ; CHECK-NEXT:    [[A_VAL_1:%.*]] = load double, ptr [[A_GEP_1]], align 8
-; CHECK-NEXT:    [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM_NEXT]]
+; CHECK-NEXT:    [[SUM_NEXT_1]] = fadd fast double [[A_VAL_1]], [[SUM1]]
 ; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    [[SUM_RED:%.*]] = fadd fast double [[SUM_NEXT]], [[SUM_NEXT_1]]
 ; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi double [ [[SUM_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi double [ [[SUM_RED]], [[LOOP]] ]
 ; CHECK-NEXT:    ret double [[SUM_LCSSA]]
 ;
 entry:
@@ -94,19 +98,21 @@ define i64 @p3(ptr %A) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT_1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[AND:%.*]] = phi i64 [ -1, [[ENTRY]] ], [ [[AND_NEXT_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[AND:%.*]] = phi i64 [ -1, [[ENTRY]] ], [ [[AND_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[AND1:%.*]] = phi i64 [ -1, [[ENTRY]] ], [ [[AND_NEXT_1:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[I]]
 ; CHECK-NEXT:    [[A_VAL:%.*]] = load i64, ptr [[A_GEP]], align 8
-; CHECK-NEXT:    [[AND_NEXT:%.*]] = and i64 [[A_VAL]], [[AND]]
+; CHECK-NEXT:    [[AND_NEXT]] = and i64 [[A_VAL]], [[AND]]
 ; CHECK-NEXT:    [[I_NEXT:%.*]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_NEXT]]
 ; CHECK-NEXT:    [[A_VAL_1:%.*]] = load i64, ptr [[A_GEP_1]], align 8
-; CHECK-NEXT:    [[AND_NEXT_1]] = and i64 [[A_VAL_1]], [[AND_NEXT]]
+; CHECK-NEXT:    [[AND_NEXT_1]] = and i64 [[A_VAL_1]], [[AND1]]
 ; CHECK-NEXT:    [[I_NEXT_1]] = add nuw nsw i64 [[I]], 2
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[I_NEXT_1]], 1000
+; CHECK-NEXT:    [[AND_RED:%.*]] = and i64 [[AND_NEXT]], [[AND_NEXT_1]]
 ; CHECK-NEXT:    br i1 [[CMP_1]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[AND_LCSSA:%.*]] = phi i64 [ [[AND_NEXT_1]], [[LOOP]] ]
+; CHECK-NEXT:    [[AND_LCSSA:%.*]] = phi i64 [ [[AND_RED]], [[LOOP]] ]
 ; CHECK-NEXT:    ret i64 [[AND_LCSSA]]
 ;
 entry:
diff --git a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
index 5fe267d62f9333..4b83483f214da1 100644
--- a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
+++ b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
@@ -18,129 +18,182 @@ define dso_local zeroext i32 @foo(ptr noundef %a) #0 {
 ; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
 ; CHECK:       for.body4:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY4]] ]
-; CHECK-NEXT:    [[SUM_11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_1121:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_111:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_1122:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_218:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_1123:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_3:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_1124:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_4:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_1125:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_5:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_1126:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_6:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[SUM_1127:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7:%.*]], [[FOR_BODY4]] ]
 ; CHECK-NEXT:    [[IDX_NEG:%.*]] = sub nsw i64 0, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA3:![0-9]+]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], [[SUM_11]]
+; CHECK-NEXT:    [[ADD]] = add i32 [[TMP0]], [[SUM_11]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_NEG:%.*]] = xor i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:    [[ADD_PTR_110:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_NEG]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_110]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[ADD_111:%.*]] = add i32 [[TMP1]], [[ADD]]
+; CHECK-NEXT:    [[ADD_111]] = add i32 [[TMP1]], [[SUM_1121]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_112_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_217:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_112_NEG]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ADD_PTR_217]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[ADD_218:%.*]] = add i32 [[TMP2]], [[ADD_111]]
+; CHECK-NEXT:    [[ADD_218]] = add i32 [[TMP2]], [[SUM_1122]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_219_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_219_NEG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ADD_PTR_3]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[TMP3]], [[ADD_218]]
+; CHECK-NEXT:    [[ADD_3]] = add i32 [[TMP3]], [[SUM_1123]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_3_NEG]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ADD_PTR_4]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 [[TMP4]], [[ADD_3]]
+; CHECK-NEXT:    [[ADD_4]] = add i32 [[TMP4]], [[SUM_1124]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_4_NEG]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ADD_PTR_5]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 [[TMP5]], [[ADD_4]]
+; CHECK-NEXT:    [[ADD_5]] = add i32 [[TMP5]], [[SUM_1125]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_5_NEG]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_6]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 [[TMP6]], [[ADD_5]]
+; CHECK-NEXT:    [[ADD_6]] = add i32 [[TMP6]], [[SUM_1126]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[ADD_PTR_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_6_NEG]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[ADD_7]] = add i32 [[TMP7]], [[ADD_6]]
+; CHECK-NEXT:    [[ADD_7]] = add i32 [[TMP7]], [[SUM_1127]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], 32
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT_7]], label [[FOR_BODY4_1:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT_7]], label [[FOR_BODY4_1_PREHEADER:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       for.body4.1.preheader:
+; CHECK-NEXT:    [[SUM_11_RED34:%.*]] = add i32 [[ADD]], [[ADD_111]]
+; CHECK-NEXT:    [[SUM_11_RED35:%.*]] = add i32 [[SUM_11_RED34]], [[ADD_218]]
+; CHECK-NEXT:    [[SUM_11_RED36:%.*]] = add i32 [[SUM_11_RED35]], [[ADD_3]]
+; CHECK-NEXT:    [[SUM_11_RED37:%.*]] = add i32 [[SUM_11_RED36]], [[ADD_4]]
+; CHECK-NEXT:    [[SUM_11_RED38:%.*]] = add i32 [[SUM_11_RED37]], [[ADD_5]]
+; CHECK-NEXT:    [[SUM_11_RED39:%.*]] = add i32 [[SUM_11_RED38]], [[ADD_6]]
+; CHECK-NEXT:    [[SUM_11_RED40:%.*]] = add i32 [[SUM_11_RED39]], [[ADD_7]]
+; CHECK-NEXT:    br label [[FOR_BODY4_1:%.*]]
 ; CHECK:       for.body4.1:
-; CHECK-NEXT:    [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1_7:%.*]], [[FOR_BODY4_1]] ], [ 0, [[FOR_BODY4]] ]
-; CHECK-NEXT:    [[SUM_11_1:%.*]] = phi i32 [ [[ADD_1_7:%.*]], [[FOR_BODY4_1]] ], [ [[ADD_7]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[INDVARS_IV_1:%.*]] = phi i64 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[INDVARS_IV_NEXT_1_7:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[SUM_11_1:%.*]] = phi i32 [ [[SUM_11_RED40]], [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[SUM_11_148:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_1:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[SUM_11_149:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_2:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[SUM_11_150:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_3:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[SUM_11_151:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_4:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[SUM_11_152:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_5:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[SUM_11_153:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_6:%.*]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[SUM_11_154:%.*]] = phi i32 [ 0, [[FOR_BODY4_1_PREHEADER]] ], [ [[ADD_1_7:%.*]], [[FOR_BODY4_1]] ]
 ; CHECK-NEXT:    [[IDX_NEG_1:%.*]] = sub nsw i64 0, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_1:%.*]] = shl i32 [[TMP8]], 1
+; CHECK-NEXT:    [[ADD_1]] = add i32 [[MUL_1]], [[SUM_11_1]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_NEG:%.*]] = xor i64 [[INDVARS_IV_1]], -1
 ; CHECK-NEXT:    [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_NEG]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[MUL_1_1:%.*]] = shl i32 [[TMP9]], 1
+; CHECK-NEXT:    [[ADD_1_1]] = add i32 [[MUL_1_1]], [[SUM_11_148]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_1_NEG]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_1_2:%.*]] = shl i32 [[TMP10]], 1
+; CHECK-NEXT:    [[ADD_1_2]] = add i32 [[MUL_1_2]], [[SUM_11_149]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_2_NEG]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_1_3:%.*]] = shl i32 [[TMP11]], 1
+; CHECK-NEXT:    [[ADD_1_3]] = add i32 [[MUL_1_3]], [[SUM_11_150]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_3_NEG]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_1_4:%.*]] = shl i32 [[TMP12]], 1
+; CHECK-NEXT:    [[ADD_1_4]] = add i32 [[MUL_1_4]], [[SUM_11_151]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_4_NEG]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_1_5:%.*]] = shl i32 [[TMP13]], 1
+; CHECK-NEXT:    [[ADD_1_5]] = add i32 [[MUL_1_5]], [[SUM_11_152]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_5_NEG]]
-; CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_1_6:%.*]] = shl i32 [[TMP14]], 1
+; CHECK-NEXT:    [[ADD_1_6]] = add i32 [[MUL_1_6]], [[SUM_11_153]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_1_6_NEG]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = shl i32 [[TMP22]], 1
-; CHECK-NEXT:    [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_1_7:%.*]] = shl i32 [[TMP15]], 1
+; CHECK-NEXT:    [[ADD_1_7]] = add i32 [[MUL_1_7]], [[SUM_11_154]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8
 ; CHECK-NEXT:    [[EXITCOND_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1_7]], 32
-; CHECK-NEXT:    br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]]
+; CHECK-NEXT:    br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2_PREHEADER:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]]
+; CHECK:       for.body4.2.preheader:
+; CHECK-NEXT:    [[SUM_11_1_RED61:%.*]] = add i32 [[ADD_1]], [[ADD_1_1]]
+; CHECK-NEXT:    [[SUM_11_1_RED62:%.*]] = add i32 [[SUM_11_1_RED61]], [[ADD_1_2]]
+; CHECK-NEXT:    [[SUM_11_1_RED63:%.*]] = add i32 [[SUM_11_1_RED62]], [[ADD_1_3]]
+; CHECK-NEXT:    [[SUM_11_1_RED64:%.*]] = add i32 [[SUM_11_1_RED63]], [[ADD_1_4]]
+; CHECK-NEXT:    [[SUM_11_1_RED65:%.*]] = add i32 [[SUM_11_1_RED64]], [[ADD_1_5]]
+; CHECK-NEXT:    [[SUM_11_1_RED66:%.*]] = add i32 [[SUM_11_1_RED65]], [[ADD_1_6]]
+; CHECK-NEXT:    [[SUM_11_1_RED67:%.*]] = add i32 [[SUM_11_1_RED66]], [[ADD_1_7]]
+; CHECK-NEXT:    br label [[FOR_BODY4_2:%.*]]
 ; CHECK:       for.body4.2:
-; CHECK-NEXT:    [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2_7:%.*]], [[FOR_BODY4_2]] ], [ 0, [[FOR_BODY4_1]] ]
-; CHECK-NEXT:    [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ], [ [[ADD_1_7]], [[FOR_BODY4_1]] ]
+; CHECK-NEXT:    [[INDVARS_IV_2:%.*]] = phi i64 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[INDVARS_IV_NEXT_2_7:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT:    [[SUM_11_2:%.*]] = phi i32 [ [[SUM_11_1_RED67]], [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT:    [[SUM_11_275:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_1:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT:    [[SUM_11_276:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_2:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT:    [[SUM_11_277:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_3:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT:    [[SUM_11_278:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_4:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT:    [[SUM_11_279:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_5:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT:    [[SUM_11_280:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_6:%.*]], [[FOR_BODY4_2]] ]
+; CHECK-NEXT:    [[SUM_11_281:%.*]] = phi i32 [ 0, [[FOR_BODY4_2_PREHEADER]] ], [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ]
 ; CHECK-NEXT:    [[IDX_NEG_2:%.*]] = sub nsw i64 0, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[MUL_2:%.*]] = mul i32 [[TMP24]], 3
-; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[MUL_2]], [[SUM_11_2]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_2:%.*]] = mul i32 [[TMP16]], 3
+; CHECK-NEXT:    [[ADD_2]] = add i32 [[MUL_2]], [[SUM_11_2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_NEG:%.*]] = xor i64 [[INDVARS_IV_2]], -1
 ; CHECK-NEXT:    [[ADD_PTR_2_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_NEG]]
-; CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[MUL_2_1:%.*]] = mul i32 [[TMP25]], 3
-; CHECK-NEXT:    [[ADD_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD_2]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_2_1:%.*]] = mul i32 [[TMP17]], 3
+; CHECK-NEXT:    [[ADD_2_1]] = add i32 [[MUL_2_1]], [[SUM_11_275]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_1_NEG]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[MUL_2_2:%.*]] = mul i32 [[TMP26]], 3
-; CHECK-NEXT:    [[ADD_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD_2_1]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_2_2:%.*]] = mul i32 [[TMP18]], 3
+; CHECK-NEXT:    [[ADD_2_2]] = add i32 [[MUL_2_2]], [[SUM_11_276]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_2_NEG]]
-; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[MUL_2_3:%.*]] = mul i32 [[TMP27]], 3
-; CHECK-NEXT:    [[ADD_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD_2_2]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_2_3:%.*]] = mul i32 [[TMP19]], 3
+; CHECK-NEXT:    [[ADD_2_3]] = add i32 [[MUL_2_3]], [[SUM_11_277]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_3_NEG]]
-; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[MUL_2_4:%.*]] = mul i32 [[TMP28]], 3
-; CHECK-NEXT:    [[ADD_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD_2_3]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_2_4:%.*]] = mul i32 [[TMP20]], 3
+; CHECK-NEXT:    [[ADD_2_4]] = add i32 [[MUL_2_4]], [[SUM_11_278]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_4_NEG]]
-; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[MUL_2_5:%.*]] = mul i32 [[TMP29]], 3
-; CHECK-NEXT:    [[ADD_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD_2_4]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_2_5:%.*]] = mul i32 [[TMP21]], 3
+; CHECK-NEXT:    [[ADD_2_5]] = add i32 [[MUL_2_5]], [[SUM_11_279]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_5_NEG]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[MUL_2_6:%.*]] = mul i32 [[TMP30]], 3
-; CHECK-NEXT:    [[ADD_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD_2_5]]
+; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_2_6:%.*]] = mul i32 [[TMP22]], 3
+; CHECK-NEXT:    [[ADD_2_6]] = add i32 [[MUL_2_6]], [[SUM_11_280]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_2]]
 ; CHECK-NEXT:    [[ADD_PTR_2_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[INDVARS_IV_NEXT_2_6_NEG]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[MUL_2_7:%.*]] = mul i32 [[TMP31]], 3
-; CHECK-NEXT:    [[ADD_2_7]] = add i32 [[MUL_2_7]], [[ADD_2_6]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[MUL_2_7:%.*]] = mul i32 [[TMP23]], 3
+; CHECK-NEXT:    [[ADD_2_7]] = add i32 [[MUL_2_7]], [[SUM_11_281]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2_7]] = add nuw nsw i64 [[INDVARS_IV_2]], 8
 ; CHECK-NEXT:    [[EXITCOND_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2_7]], 32
 ; CHECK-NEXT:    br i1 [[EXITCOND_2_NOT_7]], label [[FOR_INC5_2:%.*]], label [[FOR_BODY4_2]], !llvm.loop [[LOOP7]]
 ; CHECK:       for.inc5.2:
-; CHECK-NEXT:    ret i32 [[ADD_2_7]]
+; CHECK-NEXT:    [[SUM_11_2_RED88:%.*]] = add i32 [[ADD_2]], [[ADD_2_1]]
+; CHECK-NEXT:    [[SUM_11_2_RED89:%.*]] = add i32 [[SUM_11_2_RED88]], [[ADD_2_2]]
+; CHECK-NEXT:    [[SUM_11_2_RED90:%.*]] = add i32 [[SUM_11_2_RED89]], [[ADD_2_3]]
+; CHECK-NEXT:    [[SUM_11_2_RED91:%.*]] = add i32 [[SUM_11_2_RED90]], [[ADD_2_4]]
+; CHECK-NEXT:    [[SUM_11_2_RED92:%.*]] = add i32 [[SUM_11_2_RED91]], [[ADD_2_5]]
+; CHECK-NEXT:    [[SUM_11_2_RED93:%.*]] = add i32 [[SUM_11_2_RED92]], [[ADD_2_6]]
+; CHECK-NEXT:    [[SUM_11_2_RED94:%.*]] = add i32 [[SUM_11_2_RED93]], [[ADD_2_7]]
+; CHECK-NEXT:    ret i32 [[SUM_11_2_RED94]]
 ;
 entry:
   call void @populate(ptr noundef @ARR)

>From b259694086a5c79991ee1a0bcf6b6562e49c4fbf Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rj.bcjesus at gmail.com>
Date: Tue, 12 Mar 2024 09:55:14 +0000
Subject: [PATCH 3/3] [LoopUnroll] Fix some formatting issues

---
 llvm/lib/Transforms/Utils/LoopUnroll.cpp | 31 +++++++++++-------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index b14d05d642e275..e30547d8ee27b4 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -84,11 +84,6 @@ STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
 STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
                                "latch (completely or otherwise)");
 
-static cl::opt<bool>
-UnrollSimplifyReductions("unroll-simplify-reductions", cl::init(true),
-                         cl::Hidden, cl::desc("Try to simplify reductions "
-                                              "after unrolling a loop."));
-
 static cl::opt<bool>
 UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
                     cl::desc("Allow runtime unrolled loops to be unrolled "
@@ -114,6 +109,11 @@ UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
 #endif
                     );
 
+static cl::opt<bool>
+UnrollSimplifyReductions("unroll-simplify-reductions", cl::init(true),
+                         cl::Hidden, cl::desc("Try to simplify reductions "
+                                              "after unrolling a loop."));
+
 
 /// Check if unrolling created a situation where we need to insert phi nodes to
 /// preserve LCSSA form.
@@ -265,8 +265,8 @@ static bool trySimplifyReductions(Instruction &I) {
   // Attempt to construct a list of instructions that are chained together
   // (i.e. that perform a reduction).
   SmallVector<BinaryOperator *, 16> Ops;
-  for (Instruction *Cur = PN, *Next = nullptr; /* true */; Cur = Next,
-                                                           Next = nullptr) {
+  for (Instruction *Cur = PN, *Next = nullptr; /* true */;
+       Cur = Next, Next = nullptr) {
     // Try to find the next element in the reduction chain.
     for (auto *U : Cur->users()) {
       auto *Candidate = dyn_cast<Instruction>(U);
@@ -298,11 +298,8 @@ static bool trySimplifyReductions(Instruction &I) {
   if (Ops.size() < 2)
     return false;
 
-  LLVM_DEBUG(
-  dbgs() << "Found candidate reduction: " << I << "\n";
-  for (auto const *Op : Ops)
-    dbgs() << "                         | " << *Op << "\n";
-  );
+  LLVM_DEBUG(dbgs() << "Candidate reduction of length " << Ops.size()
+                    << " found at " << I << ".\n");
 
   // Ensure all instructions perform the same operation and that the operation
   // is associative and commutative so that we can break the chain apart and
@@ -407,9 +404,9 @@ static bool trySimplifyReductions(Instruction &I) {
   // Helper function to create a new binary op.
   // Note: We copy the flags from Ops[0]. Could this be too permissive?
   auto CreateBinOp = [&](Value *V1, Value *V2) {
-    auto Name = PN->getName()+".red";
-    return BinaryOperator::CreateWithCopiedFlags(Opcode, V1, V2, Ops[0],
-                                                 Name, &BB->back());
+    auto Name = PN->getName() + ".red";
+    return BinaryOperator::CreateWithCopiedFlags(Opcode, V1, V2, Ops[0], Name,
+                                                 &BB->back());
   };
 
   // Compute the partial sums of the Ops:
@@ -420,7 +417,7 @@ static bool trySimplifyReductions(Instruction &I) {
   // so if we compute SOps in order (i.e. from 0 to N) we can reuse partial
   // results.
   SmallVector<Value *, 16> SOps(N+1);
-  SOps[0] = nullptr;  // alternatively we could use NeutralElem
+  SOps[0] = nullptr; // alternatively we could use NeutralElem
   SOps[1] = Ops.front();
   for (unsigned k = 2; k <= N; k++)
     SOps[k] = CreateBinOp(SOps[k-1], Ops[k-1]);
@@ -433,7 +430,7 @@ static bool trySimplifyReductions(Instruction &I) {
   // so if we compute SPhis in reverse (i.e. from N down to 0) we can reuse the
   // partial sums computed thus far.
   SmallVector<Value *, 16> SPhis(N+1);
-  SPhis[N] = nullptr;  // alternatively we could use NeutralElem
+  SPhis[N] = nullptr; // alternatively we could use NeutralElem
   SPhis[N-1] = Phis.back();
   for (signed k = N-2; k >= 0; k--)
     SPhis[k] = CreateBinOp(SPhis[k+1], Phis[k]);



More information about the llvm-commits mailing list