[llvm] [InstCombine] Combine interleaved PHI reduction chains. (PR #143878)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 12 10:02:10 PDT 2025
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/143878
>From a8103f0825c195599ae79fbb2da9c909fe702769 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 6 Jun 2025 10:30:49 -0700
Subject: [PATCH 1/3] Precommit tests.
---
.../InstCombine/phi-reduction-chain.ll | 1465 +++++++++++++++++
1 file changed, 1465 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
diff --git a/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
new file mode 100644
index 0000000000000..2b26fb5c66382
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
@@ -0,0 +1,1465 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; Reassociate add.
+define i8 @add_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @add_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate add, and maintain nuw if all ops have it.
+define i8 @add_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add nuw i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add nuw i8 %pn, 2
+ %op2 = add nuw i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add nuw i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate add, drop nuw if op1 doesn't have it.
+define i8 @add_op1_no_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_op1_no_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add nuw i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add nuw i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate add, drop nuw if op2 doesn't have it.
+define i8 @add_op2_no_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_op2_no_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add nuw i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add nuw i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add nuw i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate add, drop nuw if rdx doesn't have it.
+define i8 @add_rdx_no_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_rdx_no_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add nuw i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add nuw i8 %pn, 2
+ %op2 = add nuw i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate add, drop nsw even if all ops have it.
+define i8 @add_no_nsw(i32 %n) {
+; CHECK-LABEL: define i8 @add_no_nsw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add nuw nsw i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add nuw nsw i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add nuw nsw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add nsw i8 %pn, 2
+ %op2 = add nsw i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add nsw i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate add, keep nuw/nsw if all ops have them.
+define i8 @add_nuw_nsw(i32 %n) {
+; CHECK-LABEL: define i8 @add_nuw_nsw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add nuw nsw i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add nuw nsw i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add nuw nsw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add nuw nsw i8 %pn, 2
+ %op2 = add nuw nsw i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add nuw nsw i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate fixed-length vector operands.
+define <16 x i8> @add_v16i8(i32 %n) {
+; CHECK-LABEL: define <16 x i8> @add_v16i8(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi <16 x i8> [ zeroinitializer, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi <16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add <16 x i8> [[PN]], splat (i8 2)
+; CHECK-NEXT: [[OP2]] = add <16 x i8> [[PN2]], splat (i8 3)
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add <16 x i8> [[OP2]], [[OP1]]
+; CHECK-NEXT: ret <16 x i8> [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi <16 x i8> [ splat (i8 0), %entry ], [ %op1, %body ]
+ %pn2 = phi <16 x i8> [ splat (i8 1), %entry ], [ %op2, %body ]
+ %op1 = add <16 x i8> %pn, splat (i8 2)
+ %op2 = add <16 x i8> %pn2, splat (i8 3)
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add <16 x i8> %op2, %op1
+ ret <16 x i8> %rdx
+}
+
+; Reassociate scalable vector operands.
+define <vscale x 16 x i8> @add_nxv16i8(i32 %n) {
+; CHECK-LABEL: define <vscale x 16 x i8> @add_nxv16i8(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi <vscale x 16 x i8> [ zeroinitializer, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi <vscale x 16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add <vscale x 16 x i8> [[PN]], splat (i8 2)
+; CHECK-NEXT: [[OP2]] = add <vscale x 16 x i8> [[PN2]], splat (i8 3)
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add <vscale x 16 x i8> [[OP2]], [[OP1]]
+; CHECK-NEXT: ret <vscale x 16 x i8> [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi <vscale x 16 x i8> [ splat (i8 0), %entry ], [ %op1, %body ]
+ %pn2 = phi <vscale x 16 x i8> [ splat (i8 1), %entry ], [ %op2, %body ]
+ %op1 = add <vscale x 16 x i8> %pn, splat (i8 2)
+ %op2 = add <vscale x 16 x i8> %pn2, splat (i8 3)
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add <vscale x 16 x i8> %op2, %op1
+ ret <vscale x 16 x i8> %rdx
+}
+
+; Check other opcodes.
+
+; Reassociate mul.
+define i8 @mul_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @mul_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = mul i8 [[PN]], 3
+; CHECK-NEXT: [[OP2]] = shl i8 [[PN2]], 2
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = mul i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 1, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 2, %entry ], [ %op2, %body ]
+ %op1 = mul i8 %pn, 3
+ %op2 = mul i8 %pn2, 4
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = mul i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate mul, don't expect any flags to be propagated.
+define i8 @mul_reassoc_no_nuw_no_nsw(i32 %n) {
+; CHECK-LABEL: define i8 @mul_reassoc_no_nuw_no_nsw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = mul nuw nsw i8 [[PN]], 3
+; CHECK-NEXT: [[OP2]] = shl nuw nsw i8 [[PN2]], 2
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = mul nuw nsw i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 1, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 2, %entry ], [ %op2, %body ]
+ %op1 = mul nuw nsw i8 %pn, 3
+ %op2 = mul nuw nsw i8 %pn2, 4
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = mul nuw nsw i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate and, although it should already be optimized separately.
+define i8 @and_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @and_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i8 3
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 31, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 63, %entry ], [ %op2, %body ]
+ %op1 = and i8 %pn, 3
+ %op2 = and i8 %pn2, 7
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = and i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate or, although it should already be optimized separately.
+define i8 @or_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @or_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i8 7
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = or i8 %pn, 3
+ %op2 = or i8 %pn2, 7
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = or i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate or and propagate disjoint if all ops have it.
+; Note: This can't currently be seen as the results get optimized away.
+define i8 @or_disjoint(i32 %n) {
+; CHECK-LABEL: define i8 @or_disjoint(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i8 7
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = or disjoint i8 %pn, 3
+ %op2 = or disjoint i8 %pn2, 7
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = or disjoint i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate xor.
+define i8 @xor_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @xor_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = xor i8 [[PN]], 3
+; CHECK-NEXT: [[OP2]] = xor i8 [[PN2]], 7
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = xor i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = xor i8 %pn, 3
+ %op2 = xor i8 %pn2, 7
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = xor i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate fadd if reassoc and nsz are present on all instructions.
+define float @fadd_reassoc_nsz(i32 %n) {
+; CHECK-LABEL: define float @fadd_reassoc_nsz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = fadd reassoc nsz float [[PN]], 2.000000e+00
+; CHECK-NEXT: [[OP2]] = fadd reassoc nsz float [[PN2]], 3.000000e+00
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = fadd reassoc nsz float [[OP2]], [[OP1]]
+; CHECK-NEXT: ret float [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi float [ 0.0, %entry ], [ %op1, %body ]
+ %pn2 = phi float [ 1.0, %entry ], [ %op2, %body ]
+ %op1 = fadd reassoc nsz float %pn, 2.0
+ %op2 = fadd reassoc nsz float %pn2, 3.0
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = fadd reassoc nsz float %op2, %op1
+ ret float %rdx
+}
+
+; Reassociate fmul if reassoc and nsz are present on all instructions.
+define float @fmul_reassoc_nsz(i32 %n) {
+; CHECK-LABEL: define float @fmul_reassoc_nsz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi float [ 2.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = fmul reassoc nsz float [[PN]], 3.000000e+00
+; CHECK-NEXT: [[OP2]] = fmul reassoc nsz float [[PN2]], 4.000000e+00
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = fmul reassoc nsz float [[OP2]], [[OP1]]
+; CHECK-NEXT: ret float [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi float [ 1.0, %entry ], [ %op1, %body ]
+ %pn2 = phi float [ 2.0, %entry ], [ %op2, %body ]
+ %op1 = fmul reassoc nsz float %pn, 3.0
+ %op2 = fmul reassoc nsz float %pn2, 4.0
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = fmul reassoc nsz float %op2, %op1
+ ret float %rdx
+}
+
+; Don't reassociate without `reassoc'.
+define float @fadd_no_reassoc(i32 %n) {
+; CHECK-LABEL: define float @fadd_no_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = fadd nsz float [[PN]], 2.000000e+00
+; CHECK-NEXT: [[OP2]] = fadd nsz float [[PN2]], 3.000000e+00
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = fadd nsz float [[OP2]], [[OP1]]
+; CHECK-NEXT: ret float [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi float [ 0.0, %entry ], [ %op1, %body ]
+ %pn2 = phi float [ 1.0, %entry ], [ %op2, %body ]
+ %op1 = fadd nsz float %pn, 2.0
+ %op2 = fadd nsz float %pn2, 3.0
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = fadd nsz float %op2, %op1
+ ret float %rdx
+}
+
+; Don't reassociate without `nsz'.
+define float @fadd_no_nsz(i32 %n) {
+; CHECK-LABEL: define float @fadd_no_nsz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = fadd reassoc float [[PN]], 2.000000e+00
+; CHECK-NEXT: [[OP2]] = fadd reassoc float [[PN2]], 3.000000e+00
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = fadd reassoc float [[OP2]], [[OP1]]
+; CHECK-NEXT: ret float [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi float [ 0.0, %entry ], [ %op1, %body ]
+ %pn2 = phi float [ 1.0, %entry ], [ %op2, %body ]
+ %op1 = fadd reassoc float %pn, 2.0
+ %op2 = fadd reassoc float %pn2, 3.0
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = fadd reassoc float %op2, %op1
+ ret float %rdx
+}
+
+; Check commuted operands.
+
+; Reassociate, even if op1 has commuted operands.
+define i8 @add_op1_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_op1_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 2, %pn
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate, even if op2 has commuted operands.
+define i8 @add_op2_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_op2_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 3, %pn2
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate, even if rdx has commuted operands.
+define i8 @add_rdx_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_rdx_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP1]], [[OP2]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op1, %op2
+ ret i8 %rdx
+}
+
+; Reassociate, even if pn has commuted incoming values.
+define i8 @add_pn_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_pn_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ %op1, %body ], [ 0, %entry ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Reassociate, even if pn2 has commuted incoming values.
+define i8 @add_pn2_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_pn2_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP1]], [[OP2]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ %op2, %body ], [ 1, %entry ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op1, %op2
+ ret i8 %rdx
+}
+
+; Check the instructions have the same opcodes.
+
+; Don't reassociate if the first op doesn't match the rest.
+define i8 @no_mul_add_add(i32 %n) {
+; CHECK-LABEL: define i8 @no_mul_add_add(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = shl i8 [[PN]], 1
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 1, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = mul i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if the second op doesn't match the rest.
+define i8 @no_add_mul_add(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_mul_add(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = mul i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = mul i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if the third op doesn't match the rest.
+define i8 @no_add_add_mul(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_add_mul(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = mul i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = mul i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Check the number of uses of pn, pn2, op1 and op2.
+
+; Don't reassociate if pn has more uses.
+define i8 @no_add_pn_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_pn_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: tail call void @use(i8 [[PN]])
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ tail call void @use(i8 %pn)
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if pn2 has more uses.
+define i8 @no_add_pn2_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_pn2_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: tail call void @use(i8 [[PN2]])
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ tail call void @use(i8 %pn2)
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if op1 has more uses.
+define i8 @no_add_op1_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op1_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: tail call void @use(i8 [[OP1]])
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ tail call void @use(i8 %op1)
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if op2 has more uses.
+define i8 @no_add_op2_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op2_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: tail call void @use(i8 [[OP2]])
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ tail call void @use(i8 %op2)
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Check that init1, init2, c1 and c2 are constants.
+; Note: It should be possible to support non-constant operands, we just don't
+; do so yet.
+
+; Don't reassociate if the initial value of pn isn't constant.
+define i8 @no_add_init1(i8 %init1, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_init1(
+; CHECK-SAME: i8 [[INIT1:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ [[INIT1]], %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ %init1, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if the initial value of pn2 isn't constant.
+define i8 @no_add_init2(i8 %init2, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_init2(
+; CHECK-SAME: i8 [[INIT2:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ [[INIT2]], %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ %init2, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if the recurrence operand of op1 isn't constant.
+define i8 @no_add_c1(i8 %c1, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_c1(
+; CHECK-SAME: i8 [[C1:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], [[C1]]
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, %c1
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if the recurrence operand of op2 isn't constant.
+define i8 @no_add_c2(i8 %c2, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_c2(
+; CHECK-SAME: i8 [[C2:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], [[C2]]
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, %c2
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Check the incoming values of pn and pn2.
+
+; Don't reassociate if op1 doesn't recurse to pn.
+define i8 @no_add_op1_to_pn(i1 %c, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op1_to_pn(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2]], %[[BODY]] ]
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[OP1:%.*]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op2, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if op2 doesn't recurse to pn2.
+define i8 @no_add_op2_to_pn2(i1 %c, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op2_to_pn2(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[OP2:%.*]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op1, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if the phis have more than two incoming values.
+define i8 @no_add_phis_more_incoming_values(i1 %c, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_phis_more_incoming_values(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[BODY:.*]], label %[[OTHER:.*]]
+; CHECK: [[OTHER]]:
+; CHECK-NEXT: br label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[OTHER]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ 0, %[[OTHER]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ 1, %[[OTHER]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br i1 %c, label %body, label %other
+
+other:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ 1, %other ], [ %i.next, %body ]
+ %pn = phi i8 [ 0, %entry ], [ 0, %other ], [ %op1, %body ]
+ %pn2 = phi i8 [ 1, %entry ], [ 1, %other ], [ %op2, %body ]
+ %op1 = add i8 %pn, 2
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+; Don't reassociate if the ops span different blocks.
+define i8 @no_add_op_multi_block(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op_multi_block(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[OTHER:.*]] ]
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[OTHER]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[OTHER]] ]
+; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT: br label %[[OTHER]]
+; CHECK: [[OTHER]]:
+; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT: ret i8 [[RDX]]
+;
+entry:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %other ]
+ %pn = phi i8 [ 0, %entry ], [ %op1, %other ]
+ %pn2 = phi i8 [ 1, %entry ], [ %op2, %other ]
+ %op1 = add i8 %pn, 2
+ br label %other
+
+other:
+ %op2 = add i8 %pn2, 3
+ %i.next = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.next, %n
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ %rdx = add i8 %op2, %op1
+ ret i8 %rdx
+}
+
+declare void @use(i8)
>From 58b04aa382edc752e3474ede1cf621846e84bfef Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Fri, 6 Jun 2025 10:14:55 -0700
Subject: [PATCH 2/3] [InstCombine] Combine interleaved PHI reduction chains.
Combine sequences such as:
```llvm
%pn1 = phi [init1, %BB1], [%op1, %BB2]
%pn2 = phi [init2, %BB1], [%op2, %BB2]
%op1 = binop %pn1, constant1
%op2 = binop %pn2, constant2
%rdx = binop %op1, %op2
```
Into:
```llvm
%phi_combined = phi [init_combined, %BB1], [%op_combined, %BB2]
%rdx_combined = binop %phi_combined, constant_combined
```
This allows us to simplify interleaved reductions, for example as
generated by the loop vectorizer.
The anecdotal example for this is the loop below:
```c
float foo() {
float q = 1.f;
for (int i = 0; i < 1000; ++i)
q *= .99f;
return q;
}
```
Which currently gets lowered as an explicit loop such as (on AArch64):
```gas
.LBB0_1:
fmul v0.4s, v0.4s, v1.4s
fmul v2.4s, v2.4s, v1.4s
fmul v3.4s, v3.4s, v1.4s
fmul v4.4s, v4.4s, v1.4s
subs w8, w8, #32
b.ne .LBB0_1
```
But with this patch lowers trivially:
```gas
foo:
mov w8, #5028
movk w8, #14389, lsl #16
fmov s0, w8
ret
```
Currently, we require init1, init2, constant1 and constant2 to be
constants that we can fold, but this may be relaxed in the future.
---
.../InstCombine/InstCombineInternal.h | 3 +
.../Transforms/InstCombine/InstCombinePHI.cpp | 164 ++++++++++++++++++
.../InstCombine/phi-reduction-chain.ll | 133 ++++----------
3 files changed, 205 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 334462d715f95..248092e21f109 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -652,6 +652,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *foldPHIArgZextsIntoPHI(PHINode &PN);
Instruction *foldPHIArgIntToPtrToPHI(PHINode &PN);
+ /// Try to fold interleaved PHI reductions to a single PHI.
+ Instruction *foldPHIReduction(PHINode &PN);
+
/// If the phi is within a phi web, which is formed by the def-use chain
/// of phis and all the phis in the web are only used in the other phis.
/// In this case, these phis are dead and we will remove all of them.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index a842a5edcb8a3..7fecb213cb0f6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -36,6 +36,7 @@ STATISTIC(NumPHIsOfInsertValues,
STATISTIC(NumPHIsOfExtractValues,
"Number of phi-of-extractvalue turned into extractvalue-of-phi");
STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd");
+STATISTIC(NumPHIsInterleaved, "Number of interleaved PHI's combined");
/// The PHI arguments will be folded into a single operation with a PHI node
/// as input. The debug location of the single operation will be the merged
@@ -989,6 +990,165 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
return NewCI;
}
+/// Try to fold reduction ops interleaved through two PHIs to a single PHI.
+///
+/// For example, combine:
+/// %phi1 = phi [init1, %BB1], [%op1, %BB2]
+/// %phi2 = phi [init2, %BB1], [%op2, %BB2]
+/// %op1 = binop %phi1, constant1
+/// %op2 = binop %phi2, constant2
+/// %rdx = binop %op1, %op2
+/// =>
+/// %phi_combined = phi [init_combined, %BB1], [%op_combined, %BB2]
+/// %rdx_combined = binop %phi_combined, constant_combined
+///
+/// For now, we require init1, init2, constant1 and constant2 to be constants.
+Instruction *InstCombinerImpl::foldPHIReduction(PHINode &PN) {
+ // For now, only handle PHIs with one use and exactly two incoming values.
+ if (!PN.hasOneUse() || PN.getNumIncomingValues() != 2)
+ return nullptr;
+
+ // Find the binop that uses PN and ensure it can be reassociated.
+ auto *BO1 = dyn_cast<BinaryOperator>(PN.user_back());
+ if (!BO1 || !BO1->hasNUses(2) || !BO1->isAssociative())
+ return nullptr;
+
+ // Ensure PN has an incoming value for BO1.
+ if (PN.getIncomingValue(0) != BO1 && PN.getIncomingValue(1) != BO1)
+ return nullptr;
+
+ // Find the initial value of PN.
+ auto *Init1 =
+ dyn_cast<Constant>(PN.getIncomingValue(PN.getIncomingValue(0) == BO1));
+ if (!Init1)
+ return nullptr;
+
+ // Find the constant operand of BO1.
+ assert((BO1->getOperand(0) == &PN || BO1->getOperand(1) == &PN) &&
+ "Unexpected operand!");
+ auto *C1 = dyn_cast<Constant>(BO1->getOperand(BO1->getOperand(0) == &PN));
+ if (!C1)
+ return nullptr;
+
+ // Find the reduction operation.
+ auto Opc = BO1->getOpcode();
+ BinaryOperator *Rdx = nullptr;
+ for (User *U : BO1->users())
+ if (U != &PN) {
+ Rdx = dyn_cast<BinaryOperator>(U);
+ break;
+ }
+ if (!Rdx || Rdx->getOpcode() != Opc || !Rdx->isAssociative())
+ return nullptr;
+
+ // Find the interleaved binop.
+ assert((Rdx->getOperand(0) == BO1 || Rdx->getOperand(1) == BO1) &&
+ "Unexpected operand!");
+ auto *BO2 =
+ dyn_cast<BinaryOperator>(Rdx->getOperand(Rdx->getOperand(0) == BO1));
+ if (!BO2 || !BO2->hasNUses(2) || !BO2->isAssociative() ||
+ BO2->getOpcode() != Opc || BO2->getParent() != BO1->getParent())
+ return nullptr;
+
+ // Find the interleaved PHI and recurrence constant.
+ auto *PN2 = dyn_cast<PHINode>(BO2->getOperand(0));
+ auto *C2 = dyn_cast<Constant>(BO2->getOperand(1));
+ if (!PN2 && !C2) {
+ PN2 = dyn_cast<PHINode>(BO2->getOperand(1));
+ C2 = dyn_cast<Constant>(BO2->getOperand(0));
+ }
+ if (!PN2 || !C2 || !PN2->hasOneUse() || PN2->getParent() != PN.getParent())
+ return nullptr;
+ assert(PN2->getNumIncomingValues() == PN.getNumIncomingValues() &&
+ "Expected PHIs with the same number of incoming values!");
+
+ // Ensure PN2 has an incoming value for BO2.
+ if (PN2->getIncomingValue(0) != BO2 && PN2->getIncomingValue(1) != BO2)
+ return nullptr;
+
+ // Find the initial value of PN2.
+ auto *Init2 = dyn_cast<Constant>(
+ PN2->getIncomingValue(PN2->getIncomingValue(0) == BO2));
+ if (!Init2)
+ return nullptr;
+
+ assert(BO1->isCommutative() && BO2->isCommutative() && Rdx->isCommutative() &&
+ "Expected commutative instructions!");
+
+ // If we've got this far, we can transform:
+ // pn = phi [init1; op1]
+ // pn2 = phi [init2; op2]
+ // op1 = binop (pn, c1)
+ // op2 = binop (pn2, c2)
+ // rdx = binop (op1, op2)
+ // Into:
+ // pn = phi [binop (init1, init2); rdx]
+ // rdx = binop (pn, binop (c1, c2))
+
+ // Attempt to fold the constants.
+ auto *Init = llvm::ConstantFoldBinaryInstruction(Opc, Init1, Init2);
+ auto *C = llvm::ConstantFoldBinaryInstruction(Opc, C1, C2);
+ if (!Init || !C)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << " Combining " << PN << "\n " << *BO1
+ << "\n with " << *PN2 << "\n " << *BO2
+ << '\n');
+ ++NumPHIsInterleaved;
+
+ // Create the new PHI.
+ auto *NewPN = PHINode::Create(PN.getType(), PN.getNumIncomingValues());
+
+ // Create the new binary op.
+ auto *NewOp = BinaryOperator::Create(Opc, NewPN, C);
+ if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
+ // Intersect FMF flags for FADD and FMUL.
+ FastMathFlags Intersect = BO1->getFastMathFlags() &
+ BO2->getFastMathFlags() & Rdx->getFastMathFlags();
+ NewOp->setFastMathFlags(Intersect);
+ } else {
+ OverflowTracking Flags;
+ Flags.AllKnownNonNegative = false;
+ Flags.AllKnownNonZero = false;
+ Flags.mergeFlags(*BO1);
+ Flags.mergeFlags(*BO2);
+ Flags.mergeFlags(*Rdx);
+ Flags.applyFlags(*NewOp);
+ }
+ InsertNewInstWith(NewOp, BO1->getIterator());
+ replaceInstUsesWith(*Rdx, NewOp);
+
+ for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
+ auto *V = PN.getIncomingValue(I);
+ auto *BB = PN.getIncomingBlock(I);
+ if (V == Init1) {
+ assert(((PN2->getIncomingValue(0) == Init2 &&
+ PN2->getIncomingBlock(0) == BB) ||
+ (PN2->getIncomingValue(1) == Init2 &&
+ PN2->getIncomingBlock(1) == BB)) &&
+ "Invalid incoming block!");
+ NewPN->addIncoming(Init, BB);
+ } else if (V == BO1) {
+ assert(((PN2->getIncomingValue(0) == BO2 &&
+ PN2->getIncomingBlock(0) == BB) ||
+ (PN2->getIncomingValue(1) == BO2 &&
+ PN2->getIncomingBlock(1) == BB)) &&
+ "Invalid incoming block!");
+ NewPN->addIncoming(NewOp, BB);
+ } else
+ llvm_unreachable("Unexpected incoming value!");
+ }
+
+ // Remove dead instructions. BO1/2 are replaced with poison to clean up their
+ // uses.
+ eraseInstFromFunction(*Rdx);
+ eraseInstFromFunction(*replaceInstUsesWith(*BO1, BO1));
+ eraseInstFromFunction(*replaceInstUsesWith(*BO2, BO2));
+ eraseInstFromFunction(*PN2);
+
+ return NewPN;
+}
+
/// Return true if this phi node is always equal to NonPhiInVal.
/// This happens with mutually cyclic phi nodes like:
/// z = some value; x = phi (y, z); y = phi (x, z)
@@ -1448,6 +1608,10 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
if (Instruction *Result = foldPHIArgOpIntoPHI(PN))
return Result;
+ // Try to fold interleaved PHI reductions to a single PHI.
+ if (Instruction *Result = foldPHIReduction(PN))
+ return Result;
+
// If the incoming values are pointer casts of the same original value,
// replace the phi with a single cast iff we can insert a non-PHI instruction.
if (PN.getType()->isPointerTy() &&
diff --git a/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
index 2b26fb5c66382..26d3cd56f158f 100644
--- a/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
+++ b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
@@ -9,15 +9,12 @@ define i8 @add_reassoc(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -46,15 +43,12 @@ define i8 @add_nuw(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add nuw i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add nuw i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -83,15 +77,12 @@ define i8 @add_op1_no_nuw(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -120,15 +111,12 @@ define i8 @add_op2_no_nuw(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add nuw i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add nuw i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -157,15 +145,12 @@ define i8 @add_rdx_no_nuw(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add nuw i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add nuw i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -194,15 +179,12 @@ define i8 @add_no_nsw(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add nuw nsw i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add nuw nsw i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add nuw nsw i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -231,15 +213,12 @@ define i8 @add_nuw_nsw(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add nuw nsw i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add nuw nsw i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add nuw nsw i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add nuw nsw i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -268,15 +247,12 @@ define <16 x i8> @add_v16i8(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi <16 x i8> [ zeroinitializer, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi <16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add <16 x i8> [[PN]], splat (i8 2)
-; CHECK-NEXT: [[OP2]] = add <16 x i8> [[PN2]], splat (i8 3)
+; CHECK-NEXT: [[PN:%.*]] = phi <16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add <16 x i8> [[PN]], splat (i8 5)
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add <16 x i8> [[OP2]], [[OP1]]
; CHECK-NEXT: ret <16 x i8> [[RDX]]
;
entry:
@@ -305,15 +281,12 @@ define <vscale x 16 x i8> @add_nxv16i8(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi <vscale x 16 x i8> [ zeroinitializer, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi <vscale x 16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add <vscale x 16 x i8> [[PN]], splat (i8 2)
-; CHECK-NEXT: [[OP2]] = add <vscale x 16 x i8> [[PN2]], splat (i8 3)
+; CHECK-NEXT: [[PN:%.*]] = phi <vscale x 16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add <vscale x 16 x i8> [[PN]], splat (i8 5)
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add <vscale x 16 x i8> [[OP2]], [[OP1]]
; CHECK-NEXT: ret <vscale x 16 x i8> [[RDX]]
;
entry:
@@ -344,15 +317,12 @@ define i8 @mul_reassoc(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = mul i8 [[PN]], 3
-; CHECK-NEXT: [[OP2]] = shl i8 [[PN2]], 2
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = mul i8 [[PN]], 12
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = mul i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -381,15 +351,12 @@ define i8 @mul_reassoc_no_nuw_no_nsw(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = mul nuw nsw i8 [[PN]], 3
-; CHECK-NEXT: [[OP2]] = shl nuw nsw i8 [[PN2]], 2
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = mul i8 [[PN]], 12
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = mul nuw nsw i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -515,15 +482,12 @@ define i8 @xor_reassoc(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = xor i8 [[PN]], 3
-; CHECK-NEXT: [[OP2]] = xor i8 [[PN2]], 7
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = xor i8 [[PN]], 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = xor i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -552,15 +516,12 @@ define float @fadd_reassoc_nsz(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = fadd reassoc nsz float [[PN]], 2.000000e+00
-; CHECK-NEXT: [[OP2]] = fadd reassoc nsz float [[PN2]], 3.000000e+00
+; CHECK-NEXT: [[PN:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = fadd reassoc nsz float [[PN]], 5.000000e+00
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = fadd reassoc nsz float [[OP2]], [[OP1]]
; CHECK-NEXT: ret float [[RDX]]
;
entry:
@@ -589,15 +550,12 @@ define float @fmul_reassoc_nsz(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi float [ 2.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = fmul reassoc nsz float [[PN]], 3.000000e+00
-; CHECK-NEXT: [[OP2]] = fmul reassoc nsz float [[PN2]], 4.000000e+00
+; CHECK-NEXT: [[PN:%.*]] = phi float [ 2.000000e+00, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = fmul reassoc nsz float [[PN]], 1.200000e+01
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = fmul reassoc nsz float [[OP2]], [[OP1]]
; CHECK-NEXT: ret float [[RDX]]
;
entry:
@@ -702,15 +660,12 @@ define i8 @add_op1_commuted(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -739,15 +694,12 @@ define i8 @add_op2_commuted(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -776,15 +728,12 @@ define i8 @add_rdx_commuted(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP1]], [[OP2]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -813,15 +762,12 @@ define i8 @add_pn_commuted(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -850,15 +796,12 @@ define i8 @add_pn2_commuted(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[OP1]] = add i8 [[PN]], 2
-; CHECK-NEXT: [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[RDX]] = add i8 [[PN]], 5
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RDX:%.*]] = add i8 [[OP1]], [[OP2]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
>From 61f06ceaf231b249f665819535d8762355d50e7d Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 12 Jun 2025 09:50:47 -0700
Subject: [PATCH 3/3] Use matchSimpleRecurrence.
---
.../Transforms/InstCombine/InstCombinePHI.cpp | 57 +++++++------------
.../InstCombine/phi-reduction-chain.ll | 16 ++++--
2 files changed, 34 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 7fecb213cb0f6..0995f1d70a27a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1004,30 +1004,22 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
///
/// For now, we require init1, init2, constant1 and constant2 to be constants.
Instruction *InstCombinerImpl::foldPHIReduction(PHINode &PN) {
- // For now, only handle PHIs with one use and exactly two incoming values.
- if (!PN.hasOneUse() || PN.getNumIncomingValues() != 2)
- return nullptr;
-
- // Find the binop that uses PN and ensure it can be reassociated.
- auto *BO1 = dyn_cast<BinaryOperator>(PN.user_back());
- if (!BO1 || !BO1->hasNUses(2) || !BO1->isAssociative())
- return nullptr;
+ BinaryOperator *BO1;
+ Value *Start1;
+ Value *Step1;
- // Ensure PN has an incoming value for BO1.
- if (PN.getIncomingValue(0) != BO1 && PN.getIncomingValue(1) != BO1)
+ // Find the first recurrence.
+ if (!PN.hasOneUse() || !matchSimpleRecurrence(&PN, BO1, Start1, Step1))
return nullptr;
- // Find the initial value of PN.
- auto *Init1 =
- dyn_cast<Constant>(PN.getIncomingValue(PN.getIncomingValue(0) == BO1));
- if (!Init1)
+ // Ensure BO1 has two uses (PN and the reduction op) and can be reassociated.
+ if (!BO1->hasNUses(2) || !BO1->isAssociative())
return nullptr;
- // Find the constant operand of BO1.
- assert((BO1->getOperand(0) == &PN || BO1->getOperand(1) == &PN) &&
- "Unexpected operand!");
- auto *C1 = dyn_cast<Constant>(BO1->getOperand(BO1->getOperand(0) == &PN));
- if (!C1)
+ // Convert Start1 and Step1 to constants.
+ auto *Init1 = dyn_cast<Constant>(Start1);
+ auto *C1 = dyn_cast<Constant>(Step1);
+ if (!Init1 || !C1)
return nullptr;
// Find the reduction operation.
@@ -1050,26 +1042,21 @@ Instruction *InstCombinerImpl::foldPHIReduction(PHINode &PN) {
BO2->getOpcode() != Opc || BO2->getParent() != BO1->getParent())
return nullptr;
- // Find the interleaved PHI and recurrence constant.
- auto *PN2 = dyn_cast<PHINode>(BO2->getOperand(0));
- auto *C2 = dyn_cast<Constant>(BO2->getOperand(1));
- if (!PN2 && !C2) {
- PN2 = dyn_cast<PHINode>(BO2->getOperand(1));
- C2 = dyn_cast<Constant>(BO2->getOperand(0));
- }
- if (!PN2 || !C2 || !PN2->hasOneUse() || PN2->getParent() != PN.getParent())
+ // Find the interleaved PHI and recurrence constants.
+ PHINode *PN2;
+ Value *Start2;
+ Value *Step2;
+ if (!matchSimpleRecurrence(BO2, PN2, Start2, Step2) || !PN2->hasOneUse() ||
+ PN2->getParent() != PN.getParent())
return nullptr;
+
assert(PN2->getNumIncomingValues() == PN.getNumIncomingValues() &&
"Expected PHIs with the same number of incoming values!");
- // Ensure PN2 has an incoming value for BO2.
- if (PN2->getIncomingValue(0) != BO2 && PN2->getIncomingValue(1) != BO2)
- return nullptr;
-
- // Find the initial value of PN2.
- auto *Init2 = dyn_cast<Constant>(
- PN2->getIncomingValue(PN2->getIncomingValue(0) == BO2));
- if (!Init2)
+ // Convert Start2 and Step2 to constants.
+ auto *Init2 = dyn_cast<Constant>(Start2);
+ auto *C2 = dyn_cast<Constant>(Step2);
+ if (!Init2 || !C2)
return nullptr;
assert(BO1->isCommutative() && BO2->isCommutative() && Rdx->isCommutative() &&
diff --git a/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
index 26d3cd56f158f..6ef24e0b0f59f 100644
--- a/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
+++ b/llvm/test/Transforms/InstCombine/phi-reduction-chain.ll
@@ -475,6 +475,7 @@ exit:
}
; Reassociate xor.
+; FIXME: Not currently matched by matchSimpleRecurrence.
define i8 @xor_reassoc(i32 %n) {
; CHECK-LABEL: define i8 @xor_reassoc(
; CHECK-SAME: i32 [[N:%.*]]) {
@@ -482,12 +483,15 @@ define i8 @xor_reassoc(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[RDX]] = xor i8 [[PN]], 4
+; CHECK-NEXT: [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = xor i8 [[PN]], 3
+; CHECK-NEXT: [[OP2]] = xor i8 [[PN2]], 7
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = xor i8 [[OP2]], [[OP1]]
; CHECK-NEXT: ret i8 [[RDX]]
;
entry:
@@ -509,6 +513,7 @@ exit:
}
; Reassociate fadd if reassoc and nsz are present on all instructions.
+; FIXME: Not currently matched by matchSimpleRecurrence.
define float @fadd_reassoc_nsz(i32 %n) {
; CHECK-LABEL: define float @fadd_reassoc_nsz(
; CHECK-SAME: i32 [[N:%.*]]) {
@@ -516,12 +521,15 @@ define float @fadd_reassoc_nsz(i32 %n) {
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[PN:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
-; CHECK-NEXT: [[RDX]] = fadd reassoc nsz float [[PN]], 5.000000e+00
+; CHECK-NEXT: [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT: [[OP1]] = fadd reassoc nsz float [[PN]], 2.000000e+00
+; CHECK-NEXT: [[OP2]] = fadd reassoc nsz float [[PN2]], 3.000000e+00
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX:%.*]] = fadd reassoc nsz float [[OP2]], [[OP1]]
; CHECK-NEXT: ret float [[RDX]]
;
entry:
More information about the llvm-commits
mailing list