[llvm] 8698d56 - [Transforms][LICM] Add the ability to undo unprofitable reassociation

Paul Osmialowski via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 1 08:42:56 PDT 2023


Author: Paul Osmialowski
Date: 2023-08-01T16:42:01+01:00
New Revision: 8698d56d996a72b6ea87d751ec644c9c611717e2

URL: https://github.com/llvm/llvm-project/commit/8698d56d996a72b6ea87d751ec644c9c611717e2
DIFF: https://github.com/llvm/llvm-project/commit/8698d56d996a72b6ea87d751ec644c9c611717e2.diff

LOG: [Transforms][LICM] Add the ability to undo unprofitable reassociation

Consider the following piece of code:

```
void innermost_loop(int i, double d1, double d2, double delta, int n, double cells[n])
{
  int j;
  const double d1d = d1 * delta;
  const double d2d = d2 * delta;

  for (j = 0; j <= i; j++)
    cells[j] = d1d * cells[j + 1] + d2d * cells[j];
}
```

When compiling at -Ofast level, after the "Reassociate expressions"
pass, this code is transformed into an equivalent of:

```
  int j;

  for (j = 0; j <= i; j++)
    cells[j] = (d1 * cells[j + 1] + d2 * cells[j]) * delta;
```

Effectively, the computation of those loop invariants isn't done
before the loop anymore, we have one extra multiplication on each
loop iteration instead. Sadly, this results in a significant
performance hit.

Similarly, specifically crafted user code will also experience
inability to hoist those invariants.

This patch is solving this issue by adding the ability to undo such
reassociation into the LICM pass. Note that for doing such
transformation this pass requires the same conditions as the
"Reassociate expressions" pass, namely, the involved binary operators
must have the reassociations allowed (e.g. by specifying the `fast`
attribute) and they must have single use only.

Some parts of this patch were suggested by Nikita Popov.

Reviewed By: huntergr, nikic, paulwalker-arm

Differential Revision: https://reviews.llvm.org/D152281

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LICM.cpp
    llvm/test/Transforms/LICM/expr-reassociate.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index f8fab03f151d28..5ef58ca811110c 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -108,6 +108,8 @@ STATISTIC(NumGEPsHoisted,
           "Number of geps reassociated and hoisted out of the loop");
 STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
                             "and hoisted out of the loop");
+STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
+                                    "reassociated and hoisted out of the loop");
 
 /// Memory promotion is enabled by default.
 static cl::opt<bool>
@@ -127,6 +129,12 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
     cl::desc("Max num uses visited for identifying load "
              "invariance in loop using invariant start (default = 8)"));
 
+cl::opt<unsigned> FPAssociationUpperLimit(
+    "licm-max-num-fp-reassociations", cl::init(5U), cl::Hidden,
+    cl::desc(
+        "Set upper limit for the number of transformations performed "
+        "during a single round of hoisting the reassociated expressions."));
+
 // Experimental option to allow imprecision in LICM in pathological cases, in
 // exchange for faster compile. This is to be removed if MemorySSA starts to
 // address the same issue. LICM calls MemorySSAWalker's
@@ -2674,6 +2682,72 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
   return false;
 }
 
+/// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where
+/// A1, A2, ... and C are loop invariants into expressions like
+/// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...
+/// invariant expressions. This functions returns true only if any hoisting has
+/// actually occured.
+static bool hoistFPAssociation(Instruction &I, Loop &L,
+                               ICFLoopSafetyInfo &SafetyInfo,
+                               MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+                               DominatorTree *DT) {
+  using namespace PatternMatch;
+  Value *VariantOp = nullptr, *InvariantOp = nullptr;
+
+  if (!match(&I, m_FMul(m_Value(VariantOp), m_Value(InvariantOp))) ||
+      !I.hasAllowReassoc())
+    return false;
+  if (L.isLoopInvariant(VariantOp))
+    std::swap(VariantOp, InvariantOp);
+  if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
+    return false;
+  Value *Factor = InvariantOp;
+
+  // First, we need to make sure we should do the transformation.
+  SmallVector<Use *> Changes;
+  SmallVector<BinaryOperator *> Worklist;
+  if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
+    Worklist.push_back(VariantBinOp);
+  while (!Worklist.empty()) {
+    BinaryOperator *BO = Worklist.pop_back_val();
+    if (!BO->hasOneUse() || !BO->hasAllowReassoc())
+      return false;
+    BinaryOperator *Op0, *Op1;
+    if (match(BO, m_FAdd(m_BinOp(Op0), m_BinOp(Op1)))) {
+      Worklist.push_back(Op0);
+      Worklist.push_back(Op1);
+      continue;
+    }
+    if (BO->getOpcode() != Instruction::FMul || L.isLoopInvariant(BO))
+      return false;
+    Use &U0 = BO->getOperandUse(0);
+    Use &U1 = BO->getOperandUse(1);
+    if (L.isLoopInvariant(U0))
+      Changes.push_back(&U0);
+    else if (L.isLoopInvariant(U1))
+      Changes.push_back(&U1);
+    else
+      return false;
+    if (Changes.size() > FPAssociationUpperLimit)
+      return false;
+  }
+  if (Changes.empty())
+    return false;
+
+  // We know we should do it so let's do the transformation.
+  auto *Preheader = L.getLoopPreheader();
+  assert(Preheader && "Loop is not in simplify form?");
+  IRBuilder<> Builder(Preheader->getTerminator());
+  for (auto *U : Changes) {
+    assert(L.isLoopInvariant(U->get()));
+    Instruction *Ins = cast<Instruction>(U->getUser());
+    U->set(Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul"));
+  }
+  I.replaceAllUsesWith(VariantOp);
+  eraseInstruction(I, SafetyInfo, MSSAU);
+  return true;
+}
+
 static bool hoistArithmetics(Instruction &I, Loop &L,
                              ICFLoopSafetyInfo &SafetyInfo,
                              MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2701,6 +2775,12 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
     return true;
   }
 
+  if (hoistFPAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
+    ++NumHoisted;
+    ++NumFPAssociationsHoisted;
+    return true;
+  }
+
   return false;
 }
 

diff  --git a/llvm/test/Transforms/LICM/expr-reassociate.ll b/llvm/test/Transforms/LICM/expr-reassociate.ll
index cd9dcf98147669..127ed19c7808de 100644
--- a/llvm/test/Transforms/LICM/expr-reassociate.ll
+++ b/llvm/test/Transforms/LICM/expr-reassociate.ll
@@ -1,7 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt -passes='reassociate' -S < %s | FileCheck %s --check-prefix=REASSOCIATE_ONLY
 ; RUN: opt -passes='licm' -S < %s | FileCheck %s --check-prefix=LICM_ONLY
+; RUN: opt -passes='licm' -licm-max-num-fp-reassociations=1 -S < %s | FileCheck %s --check-prefix=LICM_ONLY_CONSTRAINED
 ; RUN: opt -passes='reassociate,loop-mssa(licm)' -S < %s | FileCheck %s --check-prefix=LICM_AFTER_REASSOCIATE
+; RUN: opt -passes='reassociate,loop-mssa(licm)' -licm-max-num-fp-reassociations=1 -S < %s | FileCheck %s --check-prefix=LICM_AFTER_REASSOCIATE_CONSTRAINED
 
 ;
 ; A simple loop, should not get modified:
@@ -58,6 +60,28 @@ define void @innermost_loop_1d_fast(i32 %i, double %d1, double %delta, ptr %cell
 ; LICM_ONLY:       for.end:
 ; LICM_ONLY-NEXT:    ret void
 ;
+; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_1d_fast
+; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_ONLY_CONSTRAINED-NEXT:  entry:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_D1:%.*]] = fmul fast double [[D1]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_ONLY_CONSTRAINED:       for.cond:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_ONLY_CONSTRAINED:       for.body:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[FMUL_D1]], [[CELL_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_ONLY_CONSTRAINED-NEXT:    store double [[FMUL_1]], ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_ONLY_CONSTRAINED:       for.end:
+; LICM_ONLY_CONSTRAINED-NEXT:    ret void
+;
 ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_1d_fast
 ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_AFTER_REASSOCIATE-NEXT:  entry:
@@ -80,6 +104,28 @@ define void @innermost_loop_1d_fast(i32 %i, double %d1, double %delta, ptr %cell
 ; LICM_AFTER_REASSOCIATE:       for.end:
 ; LICM_AFTER_REASSOCIATE-NEXT:    ret void
 ;
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_1d_fast
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_D1:%.*]] = fmul fast double [[DELTA]], [[D1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.cond:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.body:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[FMUL_D1]], [[CELL_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    store double [[FMUL_1]], ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.end:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    ret void
+;
 entry:
   %fmul.d1 = fmul fast double %d1, %delta
   br label %for.cond
@@ -147,6 +193,7 @@ define void @innermost_loop_1d_shouldhoist_fast(i32 %i, double %d1, double %delt
 ; LICM_ONLY-LABEL: define void @innermost_loop_1d_shouldhoist_fast
 ; LICM_ONLY-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_ONLY-NEXT:  entry:
+; LICM_ONLY-NEXT:    [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]]
 ; LICM_ONLY-NEXT:    br label [[FOR_COND:%.*]]
 ; LICM_ONLY:       for.cond:
 ; LICM_ONLY-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
@@ -157,15 +204,36 @@ define void @innermost_loop_1d_shouldhoist_fast(i32 %i, double %d1, double %delt
 ; LICM_ONLY-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
 ; LICM_ONLY-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
 ; LICM_ONLY-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
-; LICM_ONLY-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[D1]], [[CELL_1]]
-; LICM_ONLY-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[FMUL_1]], [[DELTA]]
+; LICM_ONLY-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[FACTOR_OP_FMUL]], [[CELL_1]]
 ; LICM_ONLY-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
 ; LICM_ONLY-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
-; LICM_ONLY-NEXT:    store double [[FMUL_2]], ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY-NEXT:    store double [[FMUL_1]], ptr [[ARRAYIDX_J]], align 8
 ; LICM_ONLY-NEXT:    br label [[FOR_COND]]
 ; LICM_ONLY:       for.end:
 ; LICM_ONLY-NEXT:    ret void
 ;
+; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_1d_shouldhoist_fast
+; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_ONLY_CONSTRAINED-NEXT:  entry:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_ONLY_CONSTRAINED:       for.cond:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_ONLY_CONSTRAINED:       for.body:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[FACTOR_OP_FMUL]], [[CELL_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_ONLY_CONSTRAINED-NEXT:    store double [[FMUL_1]], ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_ONLY_CONSTRAINED:       for.end:
+; LICM_ONLY_CONSTRAINED-NEXT:    ret void
+;
 ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_1d_shouldhoist_fast
 ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_AFTER_REASSOCIATE-NEXT:  entry:
@@ -188,6 +256,28 @@ define void @innermost_loop_1d_shouldhoist_fast(i32 %i, double %d1, double %delt
 ; LICM_AFTER_REASSOCIATE:       for.end:
 ; LICM_AFTER_REASSOCIATE-NEXT:    ret void
 ;
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_1d_shouldhoist_fast
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[DELTA]], [[D1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.cond:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.body:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[FMUL_1]], [[CELL_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    store double [[FMUL_2]], ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.end:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    ret void
+;
 entry:
   br label %for.cond
 
@@ -284,9 +374,37 @@ define void @innermost_loop_2d_fast(i32 %i, double %d1, double %d2, double %delt
 ; LICM_ONLY:       for.end:
 ; LICM_ONLY-NEXT:    ret void
 ;
+; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_2d_fast
+; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_ONLY_CONSTRAINED-NEXT:  entry:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_D1:%.*]] = fmul fast double [[D1]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_D2:%.*]] = fmul fast double [[D2]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_ONLY_CONSTRAINED:       for.cond:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_ONLY_CONSTRAINED:       for.body:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[FMUL_D1]], [[CELL_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[FMUL_D2]], [[CELL_2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FADD_1:%.*]] = fadd fast double [[FMUL_1]], [[FMUL_2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    store double [[FADD_1]], ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_ONLY_CONSTRAINED:       for.end:
+; LICM_ONLY_CONSTRAINED-NEXT:    ret void
+;
 ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast
 ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_AFTER_REASSOCIATE-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D2]], [[DELTA]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    br label [[FOR_COND:%.*]]
 ; LICM_AFTER_REASSOCIATE:       for.cond:
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
@@ -297,18 +415,42 @@ define void @innermost_loop_2d_fast(i32 %i, double %d1, double %d2, double %delt
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
-; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
-; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL1]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]]
-; LICM_AFTER_REASSOCIATE-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]]
-; LICM_AFTER_REASSOCIATE-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE-NEXT:    store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8
 ; LICM_AFTER_REASSOCIATE-NEXT:    br label [[FOR_COND]]
 ; LICM_AFTER_REASSOCIATE:       for.end:
 ; LICM_AFTER_REASSOCIATE-NEXT:    ret void
 ;
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_2d_fast
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.cond:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.body:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.end:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    ret void
+;
 entry:
   %fmul.d1 = fmul fast double %d1, %delta
   %fmul.d2 = fmul fast double %d2, %delta
@@ -424,9 +566,45 @@ define void @innermost_loop_3d_fast(i32 %i, double %d1, double %d2, double %d3,
 ; LICM_ONLY:       for.end:
 ; LICM_ONLY-NEXT:    ret void
 ;
+; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_3d_fast
+; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_ONLY_CONSTRAINED-NEXT:  entry:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_D1:%.*]] = fmul fast double [[D1]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_D2:%.*]] = fmul fast double [[D2]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_D3:%.*]] = fmul fast double [[D3]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_ONLY_CONSTRAINED:       for.cond:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_ONLY_CONSTRAINED:       for.body:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[FMUL_D1]], [[CELL_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[FMUL_D2]], [[CELL_2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FADD_1:%.*]] = fadd fast double [[FMUL_1]], [[FMUL_2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ADD_J_2:%.*]] = add nuw nsw i32 [[J]], 2
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_3:%.*]] = fmul fast double [[FMUL_D3]], [[CELL_3]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FADD_2:%.*]] = fadd fast double [[FADD_1]], [[FMUL_3]]
+; LICM_ONLY_CONSTRAINED-NEXT:    store double [[FADD_2]], ptr [[ARRAYIDX_J_2]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_ONLY_CONSTRAINED:       for.end:
+; LICM_ONLY_CONSTRAINED-NEXT:    ret void
+;
 ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_3d_fast
 ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_AFTER_REASSOCIATE-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D3]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FACTOR_OP_FMUL2:%.*]] = fmul fast double [[D1]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FACTOR_OP_FMUL3:%.*]] = fmul fast double [[D2]], [[DELTA]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    br label [[FOR_COND:%.*]]
 ; LICM_AFTER_REASSOCIATE:       for.cond:
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
@@ -437,24 +615,54 @@ define void @innermost_loop_3d_fast(i32 %i, double %d1, double %d2, double %d3,
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
-; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL2]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
-; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL3]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[ADD_J_2:%.*]] = add nuw nsw i32 [[J]], 2
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8
-; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[D3]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[FACTOR_OP_FMUL]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[REASS_ADD1:%.*]] = fadd fast double [[REASS_ADD]], [[FMUL_3]]
-; LICM_AFTER_REASSOCIATE-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD1]], [[DELTA]]
-; LICM_AFTER_REASSOCIATE-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8
+; LICM_AFTER_REASSOCIATE-NEXT:    store double [[REASS_ADD1]], ptr [[ARRAYIDX_J_2]], align 8
 ; LICM_AFTER_REASSOCIATE-NEXT:    br label [[FOR_COND]]
 ; LICM_AFTER_REASSOCIATE:       for.end:
 ; LICM_AFTER_REASSOCIATE-NEXT:    ret void
 ;
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_3d_fast
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.cond:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.body:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ADD_J_2:%.*]] = add nuw nsw i32 [[J]], 2
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[D3]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_ADD1:%.*]] = fadd fast double [[REASS_ADD]], [[FMUL_3]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD1]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.end:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    ret void
+;
 entry:
   %fmul.d1 = fmul fast double %d1, %delta
   %fmul.d2 = fmul fast double %d2, %delta
@@ -547,6 +755,32 @@ define void @innermost_loop_2d_nofast(i32 %i, double %d1, double %d2, double %de
 ; LICM_ONLY:       for.end:
 ; LICM_ONLY-NEXT:    ret void
 ;
+; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_2d_nofast
+; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_ONLY_CONSTRAINED-NEXT:  entry:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_D1:%.*]] = fmul double [[D1]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_D2:%.*]] = fmul double [[D2]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_ONLY_CONSTRAINED:       for.cond:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_ONLY_CONSTRAINED:       for.body:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul double [[FMUL_D1]], [[CELL_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul double [[FMUL_D2]], [[CELL_2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FADD_1:%.*]] = fadd double [[FMUL_1]], [[FMUL_2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    store double [[FADD_1]], ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_ONLY_CONSTRAINED:       for.end:
+; LICM_ONLY_CONSTRAINED-NEXT:    ret void
+;
 ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_nofast
 ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_AFTER_REASSOCIATE-NEXT:  entry:
@@ -573,6 +807,32 @@ define void @innermost_loop_2d_nofast(i32 %i, double %d1, double %d2, double %de
 ; LICM_AFTER_REASSOCIATE:       for.end:
 ; LICM_AFTER_REASSOCIATE-NEXT:    ret void
 ;
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_2d_nofast
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_D1:%.*]] = fmul double [[D1]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_D2:%.*]] = fmul double [[D2]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.cond:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.body:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul double [[FMUL_D1]], [[CELL_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul double [[FMUL_D2]], [[CELL_2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FADD_1:%.*]] = fadd double [[FMUL_1]], [[FMUL_2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    store double [[FADD_1]], ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.end:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    ret void
+;
 entry:
   %fmul.d1 = fmul double %d1, %delta
   %fmul.d2 = fmul double %d2, %delta
@@ -648,6 +908,8 @@ define void @innermost_loop_2d_fast_reassociated(i32 %i, double %d1, double %d2,
 ; LICM_ONLY-LABEL: define void @innermost_loop_2d_fast_reassociated
 ; LICM_ONLY-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_ONLY-NEXT:  entry:
+; LICM_ONLY-NEXT:    [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]]
+; LICM_ONLY-NEXT:    [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D2]], [[DELTA]]
 ; LICM_ONLY-NEXT:    br label [[FOR_COND:%.*]]
 ; LICM_ONLY:       for.cond:
 ; LICM_ONLY-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
@@ -658,21 +920,47 @@ define void @innermost_loop_2d_fast_reassociated(i32 %i, double %d1, double %d2,
 ; LICM_ONLY-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
 ; LICM_ONLY-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
 ; LICM_ONLY-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
-; LICM_ONLY-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_ONLY-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL]]
 ; LICM_ONLY-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
 ; LICM_ONLY-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
 ; LICM_ONLY-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
-; LICM_ONLY-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]]
+; LICM_ONLY-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL1]]
 ; LICM_ONLY-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]]
-; LICM_ONLY-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]]
-; LICM_ONLY-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY-NEXT:    store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8
 ; LICM_ONLY-NEXT:    br label [[FOR_COND]]
 ; LICM_ONLY:       for.end:
 ; LICM_ONLY-NEXT:    ret void
 ;
+; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_2d_fast_reassociated
+; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_ONLY_CONSTRAINED-NEXT:  entry:
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_ONLY_CONSTRAINED:       for.cond:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_ONLY_CONSTRAINED:       for.body:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_ONLY_CONSTRAINED:       for.end:
+; LICM_ONLY_CONSTRAINED-NEXT:    ret void
+;
 ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast_reassociated
 ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_AFTER_REASSOCIATE-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D2]], [[DELTA]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    br label [[FOR_COND:%.*]]
 ; LICM_AFTER_REASSOCIATE:       for.cond:
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
@@ -683,18 +971,42 @@ define void @innermost_loop_2d_fast_reassociated(i32 %i, double %d1, double %d2,
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
-; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
-; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]]
+; LICM_AFTER_REASSOCIATE-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL1]]
 ; LICM_AFTER_REASSOCIATE-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]]
-; LICM_AFTER_REASSOCIATE-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]]
-; LICM_AFTER_REASSOCIATE-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE-NEXT:    store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8
 ; LICM_AFTER_REASSOCIATE-NEXT:    br label [[FOR_COND]]
 ; LICM_AFTER_REASSOCIATE:       for.end:
 ; LICM_AFTER_REASSOCIATE-NEXT:    ret void
 ;
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_2d_fast_reassociated
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.cond:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.body:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.end:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    ret void
+;
 entry:
   br label %for.cond
 
@@ -777,6 +1089,31 @@ define void @innermost_loop_2d_nofast_reassociated(i32 %i, double %d1, double %d
 ; LICM_ONLY:       for.end:
 ; LICM_ONLY-NEXT:    ret void
 ;
+; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_2d_nofast_reassociated
+; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_ONLY_CONSTRAINED-NEXT:  entry:
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_ONLY_CONSTRAINED:       for.cond:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_ONLY_CONSTRAINED:       for.body:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul double [[CELL_1]], [[D1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul double [[CELL_2]], [[D2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = fadd double [[FMUL_2]], [[FMUL_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = fmul double [[REASS_ADD]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_ONLY_CONSTRAINED:       for.end:
+; LICM_ONLY_CONSTRAINED-NEXT:    ret void
+;
 ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_nofast_reassociated
 ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_AFTER_REASSOCIATE-NEXT:  entry:
@@ -802,6 +1139,31 @@ define void @innermost_loop_2d_nofast_reassociated(i32 %i, double %d1, double %d
 ; LICM_AFTER_REASSOCIATE:       for.end:
 ; LICM_AFTER_REASSOCIATE-NEXT:    ret void
 ;
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_2d_nofast_reassociated
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.cond:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.body:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul double [[D1]], [[CELL_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul double [[D2]], [[CELL_2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = fadd double [[FMUL_1]], [[FMUL_2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = fmul double [[DELTA]], [[REASS_ADD]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.end:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    ret void
+;
 entry:
   br label %for.cond
 
@@ -905,6 +1267,37 @@ define void @innermost_loop_3d_fast_reassociated_
diff erent(i32 %i, double %d1, d
 ; LICM_ONLY:       for.end:
 ; LICM_ONLY-NEXT:    ret void
 ;
+; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_3d_fast_reassociated_
diff erent
+; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_ONLY_CONSTRAINED-NEXT:  entry:
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_ONLY_CONSTRAINED:       for.cond:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_ONLY_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_ONLY_CONSTRAINED:       for.body:
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_ONLY_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[CELL_4:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_4]], [[D2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[EXTRA_MUL:%.*]] = fmul fast double [[CELL_3]], [[CELL_2]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[EXTRA_ADD:%.*]] = fadd fast double [[EXTRA_MUL]], [[REASS_ADD]]
+; LICM_ONLY_CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[EXTRA_ADD]], [[DELTA]]
+; LICM_ONLY_CONSTRAINED-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_ONLY_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_ONLY_CONSTRAINED:       for.end:
+; LICM_ONLY_CONSTRAINED-NEXT:    ret void
+;
 ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_3d_fast_reassociated_
diff erent
 ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; LICM_AFTER_REASSOCIATE-NEXT:  entry:
@@ -936,6 +1329,37 @@ define void @innermost_loop_3d_fast_reassociated_
diff erent(i32 %i, double %d1, d
 ; LICM_AFTER_REASSOCIATE:       for.end:
 ; LICM_AFTER_REASSOCIATE-NEXT:    ret void
 ;
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_3d_fast_reassociated_
diff erent
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:  entry:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND:%.*]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.cond:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.body:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ADD_J_1]] = add nuw nsw i32 [[J]], 1
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_1]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[CELL_4:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[FMUL_2:%.*]] = fmul fast double [[CELL_4]], [[D2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[EXTRA_MUL:%.*]] = fmul fast double [[CELL_3]], [[CELL_2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = fadd fast double [[EXTRA_MUL]], [[FMUL_1]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[EXTRA_ADD:%.*]] = fadd fast double [[REASS_ADD]], [[FMUL_2]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = fmul fast double [[EXTRA_ADD]], [[DELTA]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    br label [[FOR_COND]]
+; LICM_AFTER_REASSOCIATE_CONSTRAINED:       for.end:
+; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT:    ret void
+;
 entry:
   br label %for.cond
 


        


More information about the llvm-commits mailing list