[llvm] 84c849e - [InstCombine] Combine interleaved recurrences. (#143878)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 1 01:54:42 PDT 2025


Author: Ricardo Jesus
Date: 2025-07-01T09:54:38+01:00
New Revision: 84c849e85b0f8ab2d8bbeb2d9c46349f019f4e8e

URL: https://github.com/llvm/llvm-project/commit/84c849e85b0f8ab2d8bbeb2d9c46349f019f4e8e
DIFF: https://github.com/llvm/llvm-project/commit/84c849e85b0f8ab2d8bbeb2d9c46349f019f4e8e.diff

LOG: [InstCombine] Combine interleaved recurrences. (#143878)

Combine sequences such as:
```llvm
  %pn1 = phi [init1, %BB1], [%op1, %BB2]
  %pn2 = phi [init2, %BB1], [%op2, %BB2]
  %op1 = binop %pn1, constant1
  %op2 = binop %pn2, constant2
  %rdx = binop %op1, %op2
```
Into:
```llvm
  %phi_combined = phi [init_combined, %BB1], [%op_combined, %BB2]
  %rdx_combined = binop %phi_combined, constant_combined
```

This allows us to simplify interleaved reductions, for example as
introduced by the loop vectorizer.

The anecdotal example for this is the loop below:
```c
float foo() {
  float q = 1.f;
  for (int i = 0; i < 1000; ++i)
    q *= .99f;
  return q;
}
```
Which currently gets lowered explicitly such as (on AArch64,
interleaved by four):
```gas
.LBB0_1:
  fmul    v0.4s, v0.4s, v1.4s
  fmul    v2.4s, v2.4s, v1.4s
  fmul    v3.4s, v3.4s, v1.4s
  fmul    v4.4s, v4.4s, v1.4s
  subs    w8, w8, #32
  b.ne    .LBB0_1
```
But with this patch lowers trivially:
```gas
foo:
  mov     w8, #5028
  movk    w8, #14389, lsl #16
  fmov    s0, w8
  ret
```

Added: 
    llvm/test/Transforms/InstCombine/binop-recurrence.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 8c9de862fe8f2..be7645066d6d1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -620,6 +620,20 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldOpIntoPhi(Instruction &I, PHINode *PN,
                              bool AllowMultipleUses = false);
 
+  /// Try to fold binary operators whose operands are simple interleaved
+  /// recurrences to a single recurrence. This is a common pattern in reduction
+  /// operations.
+  /// Example:
+  ///   %phi1 = phi [init1, %BB1], [%op1, %BB2]
+  ///   %phi2 = phi [init2, %BB1], [%op2, %BB2]
+  ///   %op1 = binop %phi1, constant1
+  ///   %op2 = binop %phi2, constant2
+  ///   %rdx = binop %op1, %op2
+  /// -->
+  ///   %phi_combined = phi [init_combined, %BB1], [%op_combined, %BB2]
+  ///   %rdx_combined = binop %phi_combined, constant_combined
+  Instruction *foldBinopWithRecurrence(BinaryOperator &BO);
+
   /// For a binary operator with 2 phi operands, try to hoist the binary
   /// operation before the phi. This can result in fewer instructions in
   /// patterns where at least one set of phi operands simplifies.

diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 40578e5edc3ab..806b38874b450 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1989,7 +1989,114 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
   return replaceInstUsesWith(I, NewPN);
 }
 
+Instruction *InstCombinerImpl::foldBinopWithRecurrence(BinaryOperator &BO) {
+  if (!BO.isAssociative())
+    return nullptr;
+
+  // Find the interleaved binary ops.
+  auto Opc = BO.getOpcode();
+  auto *BO0 = dyn_cast<BinaryOperator>(BO.getOperand(0));
+  auto *BO1 = dyn_cast<BinaryOperator>(BO.getOperand(1));
+  if (!BO0 || !BO1 || !BO0->hasNUses(2) || !BO1->hasNUses(2) ||
+      BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
+      !BO0->isAssociative() || !BO1->isAssociative() ||
+      BO0->getParent() != BO1->getParent())
+    return nullptr;
+
+  assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
+         "Expected commutative instructions!");
+
+  // Find the matching phis, forming the recurrences.
+  PHINode *PN0, *PN1;
+  Value *Start0, *Step0, *Start1, *Step1;
+  if (!matchSimpleRecurrence(BO0, PN0, Start0, Step0) || !PN0->hasOneUse() ||
+      !matchSimpleRecurrence(BO1, PN1, Start1, Step1) || !PN1->hasOneUse() ||
+      PN0->getParent() != PN1->getParent())
+    return nullptr;
+
+  assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
+         "Expected PHIs with two incoming values!");
+
+  // Convert the start and step values to constants.
+  auto *Init0 = dyn_cast<Constant>(Start0);
+  auto *Init1 = dyn_cast<Constant>(Start1);
+  auto *C0 = dyn_cast<Constant>(Step0);
+  auto *C1 = dyn_cast<Constant>(Step1);
+  if (!Init0 || !Init1 || !C0 || !C1)
+    return nullptr;
+
+  // Fold the recurrence constants.
+  auto *Init = ConstantFoldBinaryInstruction(Opc, Init0, Init1);
+  auto *C = ConstantFoldBinaryInstruction(Opc, C0, C1);
+  if (!Init || !C)
+    return nullptr;
+
+  // Create the reduced PHI.
+  auto *NewPN = PHINode::Create(PN0->getType(), PN0->getNumIncomingValues(),
+                                "reduced.phi");
+
+  // Create the new binary op.
+  auto *NewBO = BinaryOperator::Create(Opc, NewPN, C);
+  if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
+    // Intersect FMF flags for FADD and FMUL.
+    FastMathFlags Intersect = BO0->getFastMathFlags() &
+                              BO1->getFastMathFlags() & BO.getFastMathFlags();
+    NewBO->setFastMathFlags(Intersect);
+  } else {
+    OverflowTracking Flags;
+    Flags.AllKnownNonNegative = false;
+    Flags.AllKnownNonZero = false;
+    Flags.mergeFlags(*BO0);
+    Flags.mergeFlags(*BO1);
+    Flags.mergeFlags(BO);
+    Flags.applyFlags(*NewBO);
+  }
+  NewBO->takeName(&BO);
+
+  for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
+    auto *V = PN0->getIncomingValue(I);
+    auto *BB = PN0->getIncomingBlock(I);
+    if (V == Init0) {
+      assert(((PN1->getIncomingValue(0) == Init1 &&
+               PN1->getIncomingBlock(0) == BB) ||
+              (PN1->getIncomingValue(1) == Init1 &&
+               PN1->getIncomingBlock(1) == BB)) &&
+             "Invalid incoming block!");
+      NewPN->addIncoming(Init, BB);
+    } else if (V == BO0) {
+      assert(((PN1->getIncomingValue(0) == BO1 &&
+               PN1->getIncomingBlock(0) == BB) ||
+              (PN1->getIncomingValue(1) == BO1 &&
+               PN1->getIncomingBlock(1) == BB)) &&
+             "Invalid incoming block!");
+      NewPN->addIncoming(NewBO, BB);
+    } else
+      llvm_unreachable("Unexpected incoming value!");
+  }
+
+  LLVM_DEBUG(dbgs() << "  Combined " << *PN0 << "\n           " << *BO0
+                    << "\n      with " << *PN1 << "\n           " << *BO1
+                    << '\n');
+
+  // Insert the new recurrence and remove the old (dead) ones.
+  InsertNewInstWith(NewPN, PN0->getIterator());
+  InsertNewInstWith(NewBO, BO0->getIterator());
+
+  eraseInstFromFunction(
+      *replaceInstUsesWith(*BO0, PoisonValue::get(BO0->getType())));
+  eraseInstFromFunction(
+      *replaceInstUsesWith(*BO1, PoisonValue::get(BO1->getType())));
+  eraseInstFromFunction(*PN0);
+  eraseInstFromFunction(*PN1);
+
+  return replaceInstUsesWith(BO, NewBO);
+}
+
 Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
+  // Attempt to fold binary operators whose operands are simple recurrences.
+  if (auto *NewBO = foldBinopWithRecurrence(BO))
+    return NewBO;
+
   // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
   //       we are guarding against replicating the binop in >1 predecessor.
   //       This could miss matching a phi with 2 constant incoming values.

diff  --git a/llvm/test/Transforms/InstCombine/binop-recurrence.ll b/llvm/test/Transforms/InstCombine/binop-recurrence.ll
new file mode 100644
index 0000000000000..b5eef4e3f516d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/binop-recurrence.ll
@@ -0,0 +1,1408 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; Reassociate add.
+define i8 @add_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @add_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, and maintain nuw if all ops have it.
+define i8 @add_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add nuw i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nuw i8 %pn, 2
+  %op2 = add nuw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nuw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, drop nuw if op1 doesn't have it.
+define i8 @add_op1_no_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_op1_no_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add nuw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nuw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, drop nuw if op2 doesn't have it.
+define i8 @add_op2_no_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_op2_no_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nuw i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nuw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, drop nuw if rdx doesn't have it.
+define i8 @add_rdx_no_nuw(i32 %n) {
+; CHECK-LABEL: define i8 @add_rdx_no_nuw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nuw i8 %pn, 2
+  %op2 = add nuw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, drop nsw even if all ops have it.
+define i8 @add_no_nsw(i32 %n) {
+; CHECK-LABEL: define i8 @add_no_nsw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nsw i8 %pn, 2
+  %op2 = add nsw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nsw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate add, keep nuw/nsw if all ops have them.
+define i8 @add_nuw_nsw(i32 %n) {
+; CHECK-LABEL: define i8 @add_nuw_nsw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add nuw nsw i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add nuw nsw i8 %pn, 2
+  %op2 = add nuw nsw i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add nuw nsw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate fixed-length vector operands.
+define <16 x i8> @add_v16i8(i32 %n) {
+; CHECK-LABEL: define <16 x i8> @add_v16i8(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi <16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add <16 x i8> [[REDUCED_PHI]], splat (i8 5)
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret <16 x i8> [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi <16 x i8> [ splat (i8 0), %entry ], [ %op1, %body ]
+  %pn2 = phi <16 x i8> [ splat (i8 1), %entry ], [ %op2, %body ]
+  %op1 = add <16 x i8> %pn, splat (i8 2)
+  %op2 = add <16 x i8> %pn2, splat (i8 3)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add <16 x i8> %op2, %op1
+  ret <16 x i8> %rdx
+}
+
+; Reassociate scalable vector operands.
+define <vscale x 16 x i8> @add_nxv16i8(i32 %n) {
+; CHECK-LABEL: define <vscale x 16 x i8> @add_nxv16i8(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi <vscale x 16 x i8> [ splat (i8 1), %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add <vscale x 16 x i8> [[REDUCED_PHI]], splat (i8 5)
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi <vscale x 16 x i8> [ splat (i8 0), %entry ], [ %op1, %body ]
+  %pn2 = phi <vscale x 16 x i8> [ splat (i8 1), %entry ], [ %op2, %body ]
+  %op1 = add <vscale x 16 x i8> %pn, splat (i8 2)
+  %op2 = add <vscale x 16 x i8> %pn2, splat (i8 3)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add <vscale x 16 x i8> %op2, %op1
+  ret <vscale x 16 x i8> %rdx
+}
+
+; Check other opcodes.
+
+; Reassociate mul.
+define i8 @mul_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @mul_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = mul i8 [[REDUCED_PHI]], 15
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 1, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 2, %entry ], [ %op2, %body ]
+  %op1 = mul i8 %pn, 3
+  %op2 = mul i8 %pn2, 5
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = mul i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate mul, don't expect any flags to be propagated.
+define i8 @mul_reassoc_no_nuw_no_nsw(i32 %n) {
+; CHECK-LABEL: define i8 @mul_reassoc_no_nuw_no_nsw(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 2, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = mul i8 [[REDUCED_PHI]], 15
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 1, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 2, %entry ], [ %op2, %body ]
+  %op1 = mul nuw nsw i8 %pn, 3
+  %op2 = mul nuw nsw i8 %pn2, 5
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = mul nuw nsw i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate and, although it should already be optimized separately.
+define i8 @and_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @and_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 3
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 31, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 63, %entry ], [ %op2, %body ]
+  %op1 = and i8 %pn, 3
+  %op2 = and i8 %pn2, 7
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = and i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate or, although it should already be optimized separately.
+define i8 @or_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @or_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 7
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = or i8 %pn, 3
+  %op2 = or i8 %pn2, 7
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = or i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate or and propagate disjoint if all ops have it.
+; Note: This can't currently be seen as the results get optimized away.
+define i8 @or_disjoint(i32 %n) {
+; CHECK-LABEL: define i8 @or_disjoint(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 7
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = or disjoint i8 %pn, 3
+  %op2 = or disjoint i8 %pn2, 7
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = or disjoint i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate xor.
+define i8 @xor_reassoc(i32 %n) {
+; CHECK-LABEL: define i8 @xor_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = xor i8 [[REDUCED_PHI]], 4
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = xor i8 %pn, 3
+  %op2 = xor i8 %pn2, 7
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = xor i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate fadd if reassoc and nsz are present on all instructions.
+define float @fadd_reassoc_nsz(i32 %n) {
+; CHECK-LABEL: define float @fadd_reassoc_nsz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = fadd reassoc nsz float [[REDUCED_PHI]], 5.000000e+00
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret float [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi float [ 0.0, %entry ], [ %op1, %body ]
+  %pn2 = phi float [ 1.0, %entry ], [ %op2, %body ]
+  %op1 = fadd reassoc nsz float %pn, 2.0
+  %op2 = fadd reassoc nsz float %pn2, 3.0
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = fadd reassoc nsz float %op2, %op1
+  ret float %rdx
+}
+
+; Reassociate fmul if reassoc and nsz are present on all instructions.
+define float @fmul_reassoc_nsz(i32 %n) {
+; CHECK-LABEL: define float @fmul_reassoc_nsz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi float [ 2.000000e+00, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = fmul reassoc nsz float [[REDUCED_PHI]], 1.200000e+01
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret float [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi float [ 1.0, %entry ], [ %op1, %body ]
+  %pn2 = phi float [ 2.0, %entry ], [ %op2, %body ]
+  %op1 = fmul reassoc nsz float %pn, 3.0
+  %op2 = fmul reassoc nsz float %pn2, 4.0
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = fmul reassoc nsz float %op2, %op1
+  ret float %rdx
+}
+
+; Don't reassociate without `reassoc'.
+define float @fadd_no_reassoc(i32 %n) {
+; CHECK-LABEL: define float @fadd_no_reassoc(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = fadd nsz float [[PN]], 2.000000e+00
+; CHECK-NEXT:    [[OP2]] = fadd nsz float [[PN2]], 3.000000e+00
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = fadd nsz float [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret float [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi float [ 0.0, %entry ], [ %op1, %body ]
+  %pn2 = phi float [ 1.0, %entry ], [ %op2, %body ]
+  %op1 = fadd nsz float %pn, 2.0
+  %op2 = fadd nsz float %pn2, 3.0
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = fadd nsz float %op2, %op1
+  ret float %rdx
+}
+
+; Don't reassociate without `nsz'.
+define float @fadd_no_nsz(i32 %n) {
+; CHECK-LABEL: define float @fadd_no_nsz(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = fadd reassoc float [[PN]], 2.000000e+00
+; CHECK-NEXT:    [[OP2]] = fadd reassoc float [[PN2]], 3.000000e+00
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = fadd reassoc float [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret float [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi float [ 0.0, %entry ], [ %op1, %body ]
+  %pn2 = phi float [ 1.0, %entry ], [ %op2, %body ]
+  %op1 = fadd reassoc float %pn, 2.0
+  %op2 = fadd reassoc float %pn2, 3.0
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = fadd reassoc float %op2, %op1
+  ret float %rdx
+}
+
+; Check commuted operands.
+
+; Reassociate, even if op1 has commuted operands.
+define i8 @add_op1_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_op1_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 2, %pn
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate, even if op2 has commuted operands.
+define i8 @add_op2_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_op2_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 3, %pn2
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate, even if rdx has commuted operands.
+define i8 @add_rdx_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_rdx_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op1, %op2
+  ret i8 %rdx
+}
+
+; Reassociate, even if pn has commuted incoming values.
+define i8 @add_pn_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_pn_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ %op1, %body ], [ 0, %entry ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Reassociate, even if pn2 has commuted incoming values.
+define i8 @add_pn2_commuted(i32 %n) {
+; CHECK-LABEL: define i8 @add_pn2_commuted(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[REDUCED_PHI:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[RDX:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[RDX]] = add i8 [[REDUCED_PHI]], 5
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ %op2, %body ], [ 1, %entry ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op1, %op2
+  ret i8 %rdx
+}
+
+; Check the instructions have the same opcodes.
+
+; Don't reassociate if the first op doesn't match the rest.
+define i8 @no_mul_add_add(i32 %n) {
+; CHECK-LABEL: define i8 @no_mul_add_add(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = shl i8 [[PN]], 1
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 1, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = mul i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the second op doesn't match the rest.
+define i8 @no_add_mul_add(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_mul_add(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = mul i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = mul i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the third op doesn't match the rest.
+define i8 @no_add_add_mul(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_add_mul(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = mul i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = mul i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Check the number of uses of pn, pn2, op1 and op2.
+
+; Don't reassociate if pn has more uses.
+define i8 @no_add_pn_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_pn_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    tail call void @use(i8 [[PN]])
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  tail call void @use(i8 %pn)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if pn2 has more uses.
+define i8 @no_add_pn2_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_pn2_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    tail call void @use(i8 [[PN2]])
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  tail call void @use(i8 %pn2)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if op1 has more uses.
+define i8 @no_add_op1_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op1_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    tail call void @use(i8 [[OP1]])
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  tail call void @use(i8 %op1)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if op2 has more uses.
+define i8 @no_add_op2_use(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op2_use(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    tail call void @use(i8 [[OP2]])
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  tail call void @use(i8 %op2)
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Check that init1, init2, c1 and c2 are constants.
+; Note: It should be possible to support non-constant operands, we just don't
+; do so yet.
+
+; Don't reassociate if the initial value of pn isn't constant.
+define i8 @no_add_init1(i8 %init1, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_init1(
+; CHECK-SAME: i8 [[INIT1:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ [[INIT1]], %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ %init1, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the initial value of pn2 isn't constant.
+define i8 @no_add_init2(i8 %init2, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_init2(
+; CHECK-SAME: i8 [[INIT2:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ [[INIT2]], %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ %init2, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the recurrence operand of op1 isn't constant.
+define i8 @no_add_c1(i8 %c1, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_c1(
+; CHECK-SAME: i8 [[C1:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], [[C1]]
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, %c1
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the recurrence operand of op2 isn't constant.
+define i8 @no_add_c2(i8 %c2, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_c2(
+; CHECK-SAME: i8 [[C2:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], [[C2]]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, %c2
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Check the incoming values of pn and pn2.
+
+; Don't reassociate if op1 doesn't recurse to pn.
+define i8 @no_add_op1_to_pn(i1 %c, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op1_to_pn(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[OP1:%.*]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op2, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if op2 doesn't recurse to pn2.
+define i8 @no_add_op2_to_pn2(i1 %c, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op2_to_pn2(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP1]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[OP2:%.*]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op1, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the phis have more than two incoming values.
+define i8 @no_add_phis_more_incoming_values(i1 %c, i32 %n) {
+; CHECK-LABEL: define i8 @no_add_phis_more_incoming_values(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[C]], label %[[BODY:.*]], label %[[OTHER:.*]]
+; CHECK:       [[OTHER]]:
+; CHECK-NEXT:    br label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[OTHER]] ], [ [[I_NEXT:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ 0, %[[OTHER]] ], [ [[OP1:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ 1, %[[OTHER]] ], [ [[OP2:%.*]], %[[BODY]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br i1 %c, label %body, label %other
+
+other:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ 1, %other ], [ %i.next, %body ]
+  %pn = phi i8 [ 0, %entry ], [ 0, %other ], [ %op1, %body ]
+  %pn2 = phi i8 [ 1, %entry ], [ 1, %other ], [ %op2, %body ]
+  %op1 = add i8 %pn, 2
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+; Don't reassociate if the ops span 
diff erent blocks.
+define i8 @no_add_op_multi_block(i32 %n) {
+; CHECK-LABEL: define i8 @no_add_op_multi_block(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[OTHER:.*]] ]
+; CHECK-NEXT:    [[PN:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[OP1:%.*]], %[[OTHER]] ]
+; CHECK-NEXT:    [[PN2:%.*]] = phi i8 [ 1, %[[ENTRY]] ], [ [[OP2:%.*]], %[[OTHER]] ]
+; CHECK-NEXT:    [[OP1]] = add i8 [[PN]], 2
+; CHECK-NEXT:    br label %[[OTHER]]
+; CHECK:       [[OTHER]]:
+; CHECK-NEXT:    [[OP2]] = add i8 [[PN2]], 3
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RDX:%.*]] = add i8 [[OP2]], [[OP1]]
+; CHECK-NEXT:    ret i8 [[RDX]]
+;
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %other ]
+  %pn = phi i8 [ 0, %entry ], [ %op1, %other ]
+  %pn2 = phi i8 [ 1, %entry ], [ %op2, %other ]
+  %op1 = add i8 %pn, 2
+  br label %other
+
+other:
+  %op2 = add i8 %pn2, 3
+  %i.next = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %n
+  br i1 %cmp, label %exit, label %body
+
+exit:
+  %rdx = add i8 %op2, %op1
+  ret i8 %rdx
+}
+
+declare void @use(i8)


        


More information about the llvm-commits mailing list