[llvm] [InstCombine] Combine interleaved PHI reduction chains. (PR #143878)

Ricardo Jesus via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 16 03:21:19 PDT 2025


https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/143878

>From 11e41c6d179dcff902b8aba8a4960235e7714fdb Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 6 Jun 2025 10:30:49 -0700
Subject: [PATCH 1/3] Precommit tests.

---
 .../InstCombine/phi-reduction-chain.ll        | 1465 +++++++++++++++++
 1 file changed, 1465 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/phi-reduction-chain.ll

diff --git a/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
new file mode 100644
index 0000000000000..2b26fb5c66382
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
@@ -0,0 +1,1465 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; Reassociate add.
+define i8 @add_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @add_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, and maintain nuw if all ops have it.
+define i8 @add_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add nuw i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nuw i8 %pn, 2
+  %op2 = add nuw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nuw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, drop nuw if op1 doesn't have it.
+define i8 @add_op1_no_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_op1_no_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add nuw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nuw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, drop nuw if op2 doesn't have it.
+define i8 @add_op2_no_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_op2_no_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add nuw i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nuw i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nuw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, drop nuw if rdx doesn't have it.
+define i8 @add_rdx_no_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_rdx_no_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add nuw i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nuw i8 %pn, 2
+  %op2 = add nuw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, drop nsw even if all ops have it.
+define i8 @add_no_nsw(i32 %n) {
+; CHECK-LABEL: define i8 @add_no_nsw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add nuw nsw i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add nuw nsw i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add nuw nsw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nsw i8 %pn, 2
+  %op2 = add nsw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nsw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, keep nuw/nsw if all ops have them.
+define i8 @add_nuw_nsw(i32 %n) {
+; CHECK-LABEL: define i8 @add_nuw_nsw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add nuw nsw i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add nuw nsw i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add nuw nsw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nuw nsw i8 %pn, 2
+  %op2 = add nuw nsw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nuw nsw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate fixed-length vector operands.
+define <16 x i8> @add_v16i8(i32 %n) {
+; CHECK-LABEL: define <16 x i8> @add_v16i8(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi <16 x i8> [ zeroinitializer, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi <16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add <16 x i8> [[PN]], splat (i8 2)
+; CHECK-NEXT:    [[OP2]] = add <16 x i8> [[PN2]], splat (i8 3)
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add <16 x i8> [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret <16 x i8> [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi <16 x i8> [ splat (i8 0), %entry ], [ %op1, %body ]
+  %pn2 = phi <16 x i8> [ splat (i8 1), %entry ], [ %op2, %body ]
+  %op1 = add <16 x i8> %pn, splat (i8 2)
+  %op2 = add <16 x i8> %pn2, splat (i8 3)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add <16 x i8> %op2, %op1
+  ret <16 x i8> %rdx
+}
+
+; Reassociate scalable vector operands.
+define <vscale x 16 x i8> @add_nxv16i8(i32 %n) {
+; CHECK-LABEL: define <vscale x 16 x i8> @add_nxv16i8(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi <vscale x 16 x i8> [ zeroinitializer, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi <vscale x 16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add <vscale x 16 x i8> [[PN]], splat (i8 2)
+; CHECK-NEXT:    [[OP2]] = add <vscale x 16 x i8> [[PN2]], splat (i8 3)
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add <vscale x 16 x i8> [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi <vscale x 16 x i8> [ splat (i8 0), %entry ], [ %op1, %body ]
+  %pn2 = phi <vscale x 16 x i8> [ splat (i8 1), %entry ], [ %op2, %body ]
+  %op1 = add <vscale x 16 x i8> %pn, splat (i8 2)
+  %op2 = add <vscale x 16 x i8> %pn2, splat (i8 3)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add <vscale x 16 x i8> %op2, %op1
+  ret <vscale x 16 x i8> %rdx
+}
+
+; Check other opcodes.
+
+; Reassociate mul.
+define i8 @mul_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @mul_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = mul i8 [[PN]], 3
+; CHECK-NEXT:    [[OP2]] = shl i8 [[PN2]], 2
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = mul i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 1, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 2, %entry ], [ %op2, %body ]
+  %op1 = mul i8 %pn, 3
+  %op2 = mul i8 %pn2, 4
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = mul i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate mul, don't expect any flags to be propagated.
+define i8 @mul_reassoc_no_nuw_no_nsw(i32 %n) {
+; CHECK-LABEL: define i8 @mul_reassoc_no_nuw_no_nsw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = mul nuw nsw i8 [[PN]], 3
+; CHECK-NEXT:    [[OP2]] = shl nuw nsw i8 [[PN2]], 2
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = mul nuw nsw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 1, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 2, %entry ], [ %op2, %body ]
+  %op1 = mul nuw nsw i8 %pn, 3
+  %op2 = mul nuw nsw i8 %pn2, 4
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = mul nuw nsw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate and, although it should already be optimized separately.
+define i8 @and_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @and_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 3
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 31, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 63, %entry ], [ %op2, %body ]
+  %op1 = and i8 %pn, 3
+  %op2 = and i8 %pn2, 7
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = and i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate or, although it should already be optimized separately.
+define i8 @or_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @or_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 7
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = or i8 %pn, 3
+  %op2 = or i8 %pn2, 7
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = or i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate or and propagate disjoint if all ops have it.
+; Note: This can't currently be seen as the results get optimized away.
+define i8 @or_disjoint(i32 %n) {
+; CHECK-LABEL: define i8 @or_disjoint(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 7
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = or disjoint i8 %pn, 3
+  %op2 = or disjoint i8 %pn2, 7
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = or disjoint i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate xor.
+define i8 @xor_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @xor_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = xor i8 [[PN]], 3
+; CHECK-NEXT:    [[OP2]] = xor i8 [[PN2]], 7
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = xor i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = xor i8 %pn, 3
+  %op2 = xor i8 %pn2, 7
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = xor i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate fadd if reassoc and nsz are present on all instructions.
+define float @fadd_reassoc_nsz(i32 %n) {
+; CHECK-LABEL: define float @fadd_reassoc_nsz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = fadd reassoc nsz float [[PN]], 2.000000e+00
+; CHECK-NEXT:    [[OP2]] = fadd reassoc nsz float [[PN2]], 3.000000e+00
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = fadd reassoc nsz float [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret float [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi float [ 0.0, %entry ], [ %op1, %body ]
+  %pn2 = phi float [ 1.0, %entry ], [ %op2, %body ]
+  %op1 = fadd reassoc nsz float %pn, 2.0
+  %op2 = fadd reassoc nsz float %pn2, 3.0
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = fadd reassoc nsz float %op2, %op1
+  ret float %rdx
+}
+
+; Reassociate fmul if reassoc and nsz are present on all instructions.
+define float @fmul_reassoc_nsz(i32 %n) {
+; CHECK-LABEL: define float @fmul_reassoc_nsz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi float [ 2.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = fmul reassoc nsz float [[PN]], 3.000000e+00
+; CHECK-NEXT:    [[OP2]] = fmul reassoc nsz float [[PN2]], 4.000000e+00
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = fmul reassoc nsz float [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret float [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi float [ 1.0, %entry ], [ %op1, %body ]
+  %pn2 = phi float [ 2.0, %entry ], [ %op2, %body ]
+  %op1 = fmul reassoc nsz float %pn, 3.0
+  %op2 = fmul reassoc nsz float %pn2, 4.0
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = fmul reassoc nsz float %op2, %op1
+  ret float %rdx
+}
+
+; Don't reassociate without `reassoc'.
+define float @fadd_no_reassoc(i32 %n) {
+; CHECK-LABEL: define float @fadd_no_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = fadd nsz float [[PN]], 2.000000e+00
+; CHECK-NEXT:    [[OP2]] = fadd nsz float [[PN2]], 3.000000e+00
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = fadd nsz float [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret float [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi float [ 0.0, %entry ], [ %op1, %body ]
+  %pn2 = phi float [ 1.0, %entry ], [ %op2, %body ]
+  %op1 = fadd nsz float %pn, 2.0
+  %op2 = fadd nsz float %pn2, 3.0
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = fadd nsz float %op2, %op1
+  ret float %rdx
+}
+
+; Don't reassociate without `nsz'.
+define float @fadd_no_nsz(i32 %n) {
+; CHECK-LABEL: define float @fadd_no_nsz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = fadd reassoc float [[PN]], 2.000000e+00
+; CHECK-NEXT:    [[OP2]] = fadd reassoc float [[PN2]], 3.000000e+00
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = fadd reassoc float [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret float [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi float [ 0.0, %entry ], [ %op1, %body ]
+  %pn2 = phi float [ 1.0, %entry ], [ %op2, %body ]
+  %op1 = fadd reassoc float %pn, 2.0
+  %op2 = fadd reassoc float %pn2, 3.0
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = fadd reassoc float %op2, %op1
+  ret float %rdx
+}
+
+; Check commuted operands.
+
+; Reassociate, even if op1 has commuted operands.
+define i8 @add_op1_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_op1_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 2, %pn
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate, even if op2 has commuted operands.
+define i8 @add_op2_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_op2_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 3, %pn2
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate, even if rdx has commuted operands.
+define i8 @add_rdx_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_rdx_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP1]], [[OP2]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op1, %op2
+  ret i8 %rdx
+}
+
+; Reassociate, even if pn has commuted incoming values.
+define i8 @add_pn_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_pn_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ %op1, %body ], [ 0, %entry ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate, even if pn2 has commuted incoming values.
+define i8 @add_pn2_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_pn2_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP1]], [[OP2]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ %op2, %body ], [ 1, %entry ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op1, %op2
+  ret i8 %rdx
+}
+
+; Check the instructions have the same opcodes.
+
+; Don't reassociate if the first op doesn't match the rest.
+define i8 @no_mul_add_add(i32 %n) {
+; CHECK-LABEL: define i8 @no_mul_add_add(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = shl i8 [[PN]], 1
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 1, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = mul i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the second op doesn't match the rest.
+define i8 @no_add_mul_add(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_mul_add(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = mul i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = mul i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the third op doesn't match the rest.
+define i8 @no_add_add_mul(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_add_mul(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = mul i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = mul i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Check the number of uses of pn, pn2, op1 and op2.
+
+; Don't reassociate if pn has more uses.
+define i8 @no_add_pn_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_pn_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    tail call void @use(i8 [[PN]])
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  tail call void @use(i8 %pn)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if pn2 has more uses.
+define i8 @no_add_pn2_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_pn2_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    tail call void @use(i8 [[PN2]])
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  tail call void @use(i8 %pn2)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if op1 has more uses.
+define i8 @no_add_op1_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op1_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    tail call void @use(i8 [[OP1]])
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  tail call void @use(i8 %op1)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if op2 has more uses.
+define i8 @no_add_op2_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op2_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    tail call void @use(i8 [[OP2]])
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  tail call void @use(i8 %op2)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Check that init1, init2, c1 and c2 are constants.
+; Note: It should be possible to support non-constant operands, we just don't
+; do so yet.
+
+; Don't reassociate if the initial value of pn isn't constant.
+define i8 @no_add_init1(i8 %init1, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_init1(
+; CHECK-SAME: i8 [[INIT1:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ [[INIT1]], %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ %init1, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the initial value of pn2 isn't constant.
+define i8 @no_add_init2(i8 %init2, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_init2(
+; CHECK-SAME: i8 [[INIT2:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ [[INIT2]], %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ %init2, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the recurrence operand of op1 isn't constant.
+define i8 @no_add_c1(i8 %c1, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_c1(
+; CHECK-SAME: i8 [[C1:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], [[C1]]
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, %c1
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the recurrence operand of op2 isn't constant.
+define i8 @no_add_c2(i8 %c2, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_c2(
+; CHECK-SAME: i8 [[C2:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], [[C2]]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, %c2
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Check the incoming values of pn and pn2.
+
+; Don't reassociate if op1 doesn't recurse to pn.
+define i8 @no_add_op1_to_pn(i1 %c, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op1_to_pn(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[OP1:%.*]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op2, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if op2 doesn't recurse to pn2.
+define i8 @no_add_op2_to_pn2(i1 %c, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op2_to_pn2(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[OP2:%.*]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op1, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the phis have more than two incoming values.
+define i8 @no_add_phis_more_incoming_values(i1 %c, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_phis_more_incoming_values(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[C]], label %[[BODY:.*]], label %[[OTHER:.*]]
+; CHECK:       [[OTHER]]:
+; CHECK-NEXT:    br label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[OTHER]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ 0, %[[OTHER]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ 1, %[[OTHER]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br i1 %c, label %body, label %other
+
+other:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ 1, %other ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ 0, %other ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ 1, %other ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the ops span different blocks.
+define i8 @no_add_op_multi_block(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op_multi_block(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[OTHER:.*]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[OTHER]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[OTHER]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    br label %[[OTHER]]
+; CHECK:       [[OTHER]]:
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %other ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %other ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %other ]
+  %op1 = add i8 %pn, 2
+  br label %other
+
+other:
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+declare void @use(i8)

>From 61ef013829eaa9dc84e6f3c3323c2478ae22f723 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 6 Jun 2025 10:14:55 -0700
Subject: [PATCH 2/3] [InstCombine] Combine interleaved PHI reduction chains.

Combine sequences such as:
```llvm
  %pn1 = phi [init1, %BB1], [%op1, %BB2]
  %pn2 = phi [init2, %BB1], [%op2, %BB2]
  %op1 = binop %pn1, constant1
  %op2 = binop %pn2, constant2
  %rdx = binop %op1, %op2
```
Into:
```llvm
  %phi_combined = phi [init_combined, %BB1], [%op_combined, %BB2]
  %rdx_combined = binop %phi_combined, constant_combined
```

This allows us to simplify interleaved reductions, for example as
generated by the loop vectorizer.

The anecdotal example for this is the loop below:
```c
float foo() {
  float q = 1.f;
  for (int i = 0; i < 1000; ++i)
    q *= .99f;
  return q;
}
```
Which currently gets lowered as an explicit loop such as (on AArch64):
```gas
.LBB0_1:
  fmul    v0.4s, v0.4s, v1.4s
  fmul    v2.4s, v2.4s, v1.4s
  fmul    v3.4s, v3.4s, v1.4s
  fmul    v4.4s, v4.4s, v1.4s
  subs    w8, w8, #32
  b.ne    .LBB0_1
```
But with this patch lowers trivially:
```gas
foo:
  mov     w8, #5028
  movk    w8, #14389, lsl #16
  fmov    s0, w8
  ret
```

Currently, we require init1, init2, constant1 and constant2 to be
constants that we can fold, but this may be relaxed in the future.
---
 .../InstCombine/InstCombineInternal.h         |   3 +
 .../Transforms/InstCombine/InstCombinePHI.cpp | 151 ++++++++++++++++++
 .../InstCombine/phi-reduction-chain.ll        | 133 +++++----------
 3 files changed, 192 insertions(+), 95 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 8c9de862fe8f2..4f42ed4eeebc9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -656,6 +656,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldPHIArgZextsIntoPHI(PHINode &PN);
   Instruction *foldPHIArgIntToPtrToPHI(PHINode &PN);
 
+  /// Try to fold interleaved PHI reductions to a single PHI.
+  Instruction *foldPHIReduction(PHINode &PN);
+
   /// If the phi is within a phi web, which is formed by the def-use chain
   /// of phis and all the phis in the web are only used in the other phis.
   /// In this case, these phis are dead and we will remove all of them.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 6477141ab095f..6e2927234f076 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -36,6 +36,7 @@ STATISTIC(NumPHIsOfInsertValues,
 STATISTIC(NumPHIsOfExtractValues,
           "Number of phi-of-extractvalue turned into extractvalue-of-phi");
 STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd");
+STATISTIC(NumPHIsInterleaved, "Number of interleaved PHI's combined");
 
 /// The PHI arguments will be folded into a single operation with a PHI node
 /// as input. The debug location of the single operation will be the merged
@@ -996,6 +997,152 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
   return NewCI;
 }
 
+/// Try to fold reduction ops interleaved through two PHIs to a single PHI.
+///
+/// For example, combine:
+///   %phi1 = phi [init1, %BB1], [%op1, %BB2]
+///   %phi2 = phi [init2, %BB1], [%op2, %BB2]
+///   %op1 = binop %phi1, constant1
+///   %op2 = binop %phi2, constant2
+///   %rdx = binop %op1, %op2
+/// =>
+///   %phi_combined = phi [init_combined, %BB1], [%op_combined, %BB2]
+///   %rdx_combined = binop %phi_combined, constant_combined
+///
+/// For now, we require init1, init2, constant1 and constant2 to be constants.
+Instruction *InstCombinerImpl::foldPHIReduction(PHINode &PN) {
+  BinaryOperator *BO1;
+  Value *Start1;
+  Value *Step1;
+
+  // Find the first recurrence.
+  if (!PN.hasOneUse() || !matchSimpleRecurrence(&PN, BO1, Start1, Step1))
+    return nullptr;
+
+  // Ensure BO1 has two uses (PN and the reduction op) and can be reassociated.
+  if (!BO1->hasNUses(2) || !BO1->isAssociative())
+    return nullptr;
+
+  // Convert Start1 and Step1 to constants.
+  auto *Init1 = dyn_cast<Constant>(Start1);
+  auto *C1 = dyn_cast<Constant>(Step1);
+  if (!Init1 || !C1)
+    return nullptr;
+
+  // Find the reduction operation.
+  auto Opc = BO1->getOpcode();
+  BinaryOperator *Rdx = nullptr;
+  for (User *U : BO1->users())
+    if (U != &PN) {
+      Rdx = dyn_cast<BinaryOperator>(U);
+      break;
+    }
+  if (!Rdx || Rdx->getOpcode() != Opc || !Rdx->isAssociative())
+    return nullptr;
+
+  // Find the interleaved binop.
+  assert((Rdx->getOperand(0) == BO1 || Rdx->getOperand(1) == BO1) &&
+         "Unexpected operand!");
+  auto *BO2 =
+      dyn_cast<BinaryOperator>(Rdx->getOperand(Rdx->getOperand(0) == BO1));
+  if (!BO2 || !BO2->hasNUses(2) || !BO2->isAssociative() ||
+      BO2->getOpcode() != Opc || BO2->getParent() != BO1->getParent())
+    return nullptr;
+
+  // Find the interleaved PHI and recurrence constants.
+  PHINode *PN2;
+  Value *Start2;
+  Value *Step2;
+  if (!matchSimpleRecurrence(BO2, PN2, Start2, Step2) || !PN2->hasOneUse() ||
+      PN2->getParent() != PN.getParent())
+    return nullptr;
+
+  assert(PN2->getNumIncomingValues() == PN.getNumIncomingValues() &&
+         "Expected PHIs with the same number of incoming values!");
+
+  // Convert Start2 and Step2 to constants.
+  auto *Init2 = dyn_cast<Constant>(Start2);
+  auto *C2 = dyn_cast<Constant>(Step2);
+  if (!Init2 || !C2)
+    return nullptr;
+
+  assert(BO1->isCommutative() && BO2->isCommutative() && Rdx->isCommutative() &&
+         "Expected commutative instructions!");
+
+  // If we've got this far, we can transform:
+  //   pn = phi [init1; op1]
+  //   pn2 = phi [init2; op2]
+  //   op1 = binop (pn, c1)
+  //   op2 = binop (pn2, c2)
+  //   rdx = binop (op1, op2)
+  // Into:
+  //   pn = phi [binop (init1, init2); rdx]
+  //   rdx = binop (pn, binop (c1, c2))
+
+  // Attempt to fold the constants.
+  auto *Init = llvm::ConstantFoldBinaryInstruction(Opc, Init1, Init2);
+  auto *C = llvm::ConstantFoldBinaryInstruction(Opc, C1, C2);
+  if (!Init || !C)
+    return nullptr;
+
+  LLVM_DEBUG(dbgs() << "  Combining " << PN << "\n            " << *BO1
+                    << "\n       with " << *PN2 << "\n            " << *BO2
+                    << '\n');
+  ++NumPHIsInterleaved;
+
+  // Create the new PHI.
+  auto *NewPN = PHINode::Create(PN.getType(), PN.getNumIncomingValues());
+
+  // Create the new binary op.
+  auto *NewOp = BinaryOperator::Create(Opc, NewPN, C);
+  if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
+    // Intersect FMF flags for FADD and FMUL.
+    FastMathFlags Intersect = BO1->getFastMathFlags() &
+                              BO2->getFastMathFlags() & Rdx->getFastMathFlags();
+    NewOp->setFastMathFlags(Intersect);
+  } else {
+    OverflowTracking Flags;
+    Flags.AllKnownNonNegative = false;
+    Flags.AllKnownNonZero = false;
+    Flags.mergeFlags(*BO1);
+    Flags.mergeFlags(*BO2);
+    Flags.mergeFlags(*Rdx);
+    Flags.applyFlags(*NewOp);
+  }
+  InsertNewInstWith(NewOp, BO1->getIterator());
+  replaceInstUsesWith(*Rdx, NewOp);
+
+  for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
+    auto *V = PN.getIncomingValue(I);
+    auto *BB = PN.getIncomingBlock(I);
+    if (V == Init1) {
+      assert(((PN2->getIncomingValue(0) == Init2 &&
+               PN2->getIncomingBlock(0) == BB) ||
+              (PN2->getIncomingValue(1) == Init2 &&
+               PN2->getIncomingBlock(1) == BB)) &&
+             "Invalid incoming block!");
+      NewPN->addIncoming(Init, BB);
+    } else if (V == BO1) {
+      assert(((PN2->getIncomingValue(0) == BO2 &&
+               PN2->getIncomingBlock(0) == BB) ||
+              (PN2->getIncomingValue(1) == BO2 &&
+               PN2->getIncomingBlock(1) == BB)) &&
+             "Invalid incoming block!");
+      NewPN->addIncoming(NewOp, BB);
+    } else
+      llvm_unreachable("Unexpected incoming value!");
+  }
+
+  // Remove dead instructions. BO1/2 are replaced with poison to clean up their
+  // uses.
+  eraseInstFromFunction(*Rdx);
+  eraseInstFromFunction(*replaceInstUsesWith(*BO1, BO1));
+  eraseInstFromFunction(*replaceInstUsesWith(*BO2, BO2));
+  eraseInstFromFunction(*PN2);
+
+  return NewPN;
+}
+
 /// Return true if this phi node is always equal to NonPhiInVal.
 /// This happens with mutually cyclic phi nodes like:
 ///   z = some value; x = phi (y, z); y = phi (x, z)
@@ -1455,6 +1602,10 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
     if (Instruction *Result = foldPHIArgOpIntoPHI(PN))
       return Result;
 
+  // Try to fold interleaved PHI reductions to a single PHI.
+  if (Instruction *Result = foldPHIReduction(PN))
+    return Result;
+
   // If the incoming values are pointer casts of the same original value,
   // replace the phi with a single cast iff we can insert a non-PHI instruction.
   if (PN.getType()->isPointerTy() &&
diff --git a/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
index 2b26fb5c66382..26d3cd56f158f 100644
--- a/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
+++ b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
@@ -9,15 +9,12 @@ define i8 @add_reassoc(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -46,15 +43,12 @@ define i8 @add_nuw(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add nuw i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add nuw i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -83,15 +77,12 @@ define i8 @add_op1_no_nuw(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -120,15 +111,12 @@ define i8 @add_op2_no_nuw(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add nuw i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -157,15 +145,12 @@ define i8 @add_rdx_no_nuw(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add nuw i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -194,15 +179,12 @@ define i8 @add_no_nsw(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add nuw nsw i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add nuw nsw i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add nuw nsw i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -231,15 +213,12 @@ define i8 @add_nuw_nsw(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add nuw nsw i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add nuw nsw i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add nuw nsw i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add nuw nsw i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -268,15 +247,12 @@ define <16 x i8> @add_v16i8(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi <16 x i8> [ zeroinitializer, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi <16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add <16 x i8> [[PN]], splat (i8 2)
-; CHECK-NEXT:    [[OP2]] = add <16 x i8> [[PN2]], splat (i8 3)
+; CHECK-NEXT:    [[PN:%.*]] = phi <16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add <16 x i8> [[PN]], splat (i8 5)
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add <16 x i8> [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret <16 x i8> [[RDX]]
 ;
 entry:
@@ -305,15 +281,12 @@ define <vscale x 16 x i8> @add_nxv16i8(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi <vscale x 16 x i8> [ zeroinitializer, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi <vscale x 16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add <vscale x 16 x i8> [[PN]], splat (i8 2)
-; CHECK-NEXT:    [[OP2]] = add <vscale x 16 x i8> [[PN2]], splat (i8 3)
+; CHECK-NEXT:    [[PN:%.*]] = phi <vscale x 16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add <vscale x 16 x i8> [[PN]], splat (i8 5)
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add <vscale x 16 x i8> [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[RDX]]
 ;
 entry:
@@ -344,15 +317,12 @@ define i8 @mul_reassoc(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = mul i8 [[PN]], 3
-; CHECK-NEXT:    [[OP2]] = shl i8 [[PN2]], 2
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = mul i8 [[PN]], 12
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = mul i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -381,15 +351,12 @@ define i8 @mul_reassoc_no_nuw_no_nsw(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = mul nuw nsw i8 [[PN]], 3
-; CHECK-NEXT:    [[OP2]] = shl nuw nsw i8 [[PN2]], 2
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = mul i8 [[PN]], 12
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = mul nuw nsw i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -515,15 +482,12 @@ define i8 @xor_reassoc(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = xor i8 [[PN]], 3
-; CHECK-NEXT:    [[OP2]] = xor i8 [[PN2]], 7
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = xor i8 [[PN]], 4
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = xor i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -552,15 +516,12 @@ define float @fadd_reassoc_nsz(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = fadd reassoc nsz float [[PN]], 2.000000e+00
-; CHECK-NEXT:    [[OP2]] = fadd reassoc nsz float [[PN2]], 3.000000e+00
+; CHECK-NEXT:    [[PN:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = fadd reassoc nsz float [[PN]], 5.000000e+00
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = fadd reassoc nsz float [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret float [[RDX]]
 ;
 entry:
@@ -589,15 +550,12 @@ define float @fmul_reassoc_nsz(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi float [ 2.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = fmul reassoc nsz float [[PN]], 3.000000e+00
-; CHECK-NEXT:    [[OP2]] = fmul reassoc nsz float [[PN2]], 4.000000e+00
+; CHECK-NEXT:    [[PN:%.*]] = phi float [ 2.000000e+00, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = fmul reassoc nsz float [[PN]], 1.200000e+01
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = fmul reassoc nsz float [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret float [[RDX]]
 ;
 entry:
@@ -702,15 +660,12 @@ define i8 @add_op1_commuted(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -739,15 +694,12 @@ define i8 @add_op2_commuted(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -776,15 +728,12 @@ define i8 @add_rdx_commuted(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP1]], [[OP2]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -813,15 +762,12 @@ define i8 @add_pn_commuted(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:
@@ -850,15 +796,12 @@ define i8 @add_pn2_commuted(i32 %n) {
 ; CHECK-NEXT:    br label %[[BODY:.*]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[PN]], 5
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP1]], [[OP2]]
 ; CHECK-NEXT:    ret i8 [[RDX]]
 ;
 entry:

>From f851c36f74caa20bf9135b8498810e8c4a6ea571 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Mon, 16 Jun 2025 02:54:37 -0700
Subject: [PATCH 3/3] Explicitly replace BO1/2 with poison.

---
 llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 6e2927234f076..34bf47a2428a5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1136,8 +1136,10 @@ Instruction *InstCombinerImpl::foldPHIReduction(PHINode &PN) {
   // Remove dead instructions. BO1/2 are replaced with poison to clean up their
   // uses.
   eraseInstFromFunction(*Rdx);
-  eraseInstFromFunction(*replaceInstUsesWith(*BO1, BO1));
-  eraseInstFromFunction(*replaceInstUsesWith(*BO2, BO2));
+  eraseInstFromFunction(
+      *replaceInstUsesWith(*BO1, PoisonValue::get(BO1->getType())));
+  eraseInstFromFunction(
+      *replaceInstUsesWith(*BO2, PoisonValue::get(BO2->getType())));
   eraseInstFromFunction(*PN2);
 
   return NewPN;



More information about the llvm-commits mailing list