[llvm] d9715a7 - [SCEV] Don't try to reuse expressions with offset

Fri Feb 25 00:20:43 PST 2022

Author: Nikita Popov
Date: 2022-02-25T09:16:48+01:00
New Revision: d9715a726674046b177221873d63578dce383feb

URL: https://github.com/llvm/llvm-project/commit/d9715a726674046b177221873d63578dce383feb
DIFF: https://github.com/llvm/llvm-project/commit/d9715a726674046b177221873d63578dce383feb.diff

LOG: [SCEV] Don't try to reuse expressions with offset

SCEVs ExprValueMap currently tracks not only which IR Values
correspond to a given SCEV expression, but additionally stores that
it may be expanded in the form X+Offset. In theory, this allows
reusing existing IR Values in more cases.

In practice, this doesn't seem to be particularly useful (the test
changes are rather underwhelming) and adds a good bit of complexity.
Per https://github.com/llvm/llvm-project/issues/53905, we have an
invalidation issue with these offseted expressions.

Differential Revision: https://reviews.llvm.org/D120311

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/ScalarEvolution.h
    llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
    llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
    llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll
    llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 768925433bed..61659bfbd126 100644

--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1246,30 +1246,11 @@ class ScalarEvolution {
   HasRecMapType HasRecMap;
 
   /// The type for ExprValueMap.
-  using ValueOffsetPair = std::pair<Value *, ConstantInt *>;
-  using ValueOffsetPairSetVector = SmallSetVector<ValueOffsetPair, 4>;
-  using ExprValueMapType = DenseMap<const SCEV *, ValueOffsetPairSetVector>;
+  using ValueSetVector = SmallSetVector<Value *, 4>;
+  using ExprValueMapType = DenseMap<const SCEV *, ValueSetVector>;
 
   /// ExprValueMap -- This map records the original values from which
   /// the SCEV expr is generated from.
-  ///
-  /// We want to represent the mapping as SCEV -> ValueOffsetPair instead
-  /// of SCEV -> Value:
-  /// Suppose we know S1 expands to V1, and
-  ///  S1 = S2 + C_a
-  ///  S3 = S2 + C_b
-  /// where C_a and C_b are 
diff erent SCEVConstants. Then we'd like to
-  /// expand S3 as V1 - C_a + C_b instead of expanding S2 literally.
-  /// It is helpful when S2 is a complex SCEV expr.
-  ///
-  /// In order to do that, we represent ExprValueMap as a mapping from
-  /// SCEV to ValueOffsetPair. We will save both S1->{V1, 0} and
-  /// S2->{V1, C_a} into the map when we create SCEV for V1. When S3
-  /// is expanded, it will first expand S2 to V1 - C_a because of
-  /// S2->{V1, C_a} in the map, then expand S3 to V1 - C_a + C_b.
-  ///
-  /// Note: S->{V, Offset} in the ExprValueMap means S can be expanded
-  /// to V - Offset.
   ExprValueMapType ExprValueMap;
 
   /// The type for ValueExprMap.
@@ -1300,7 +1281,7 @@ class ScalarEvolution {
   DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache;
 
   /// Return the Value set from which the SCEV expr is generated.
-  ValueOffsetPairSetVector *getSCEVValues(const SCEV *S);
+  ValueSetVector *getSCEVValues(const SCEV *S);
 
   /// Private helper method for the GetMinTrailingZeros method
   uint32_t GetMinTrailingZerosImpl(const SCEV *S);

diff  --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 60b772b94a6f..5a9ed598e099 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -385,8 +385,8 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
   /// Note that this function does not perform an exhaustive search. I.e if it
   /// didn't find any value it does not mean that there is no such value.
   ///
-  Optional<ScalarEvolution::ValueOffsetPair>
-  getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L);
+  Value *getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
+                                     Loop *L);
 
   /// Returns a suitable insert point after \p I, that dominates \p
   /// MustDominate. Skips instructions inserted by the expander.
@@ -444,8 +444,7 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
   Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V);
 
   /// Find a previous Value in ExprValueMap for expand.
-  ScalarEvolution::ValueOffsetPair
-  FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
+  Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
 
   Value *expand(const SCEV *S);
 

diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 24fb780d0f7e..05ee7da88609 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4278,27 +4278,9 @@ bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
   return FoundAddRec;
 }
 
-/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
-/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
-/// offset I, then return {S', I}, else return {\p S, nullptr}.
-static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
-  const auto *Add = dyn_cast<SCEVAddExpr>(S);
-  if (!Add)
-    return {S, nullptr};
-
-  if (Add->getNumOperands() != 2)
-    return {S, nullptr};
-
-  auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
-  if (!ConstOp)
-    return {S, nullptr};
-
-  return {Add->getOperand(1), ConstOp->getValue()};
-}
-
 /// Return the ValueOffsetPair set for \p S. \p S can be represented
 /// by the value and offset from any ValueOffsetPair in the set.
-ScalarEvolution::ValueOffsetPairSetVector *
+ScalarEvolution::ValueSetVector *
 ScalarEvolution::getSCEVValues(const SCEV *S) {
   ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
   if (SI == ExprValueMap.end())
@@ -4306,8 +4288,8 @@ ScalarEvolution::getSCEVValues(const SCEV *S) {
 #ifndef NDEBUG
   if (VerifySCEVMap) {
     // Check there is no dangling Value in the set returned.
-    for (const auto &VE : SI->second)
-      assert(ValueExprMap.count(VE.first));
+    for (Value *V : SI->second)
+      assert(ValueExprMap.count(V));
   }
 #endif
   return &SI->second;
@@ -4320,18 +4302,9 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
   if (I != ValueExprMap.end()) {
     const SCEV *S = I->second;
-    // Remove {V, 0} from the set of ExprValueMap[S]
+    // Remove V from the set of ExprValueMap[S]
     if (auto *SV = getSCEVValues(S))
-      SV->remove({V, nullptr});
-
-    // Remove {V, Offset} from the set of ExprValueMap[Stripped]
-    const SCEV *Stripped;
-    ConstantInt *Offset;
-    std::tie(Stripped, Offset) = splitAddExpr(S);
-    if (Offset != nullptr) {
-      if (auto *SV = getSCEVValues(Stripped))
-        SV->remove({V, Offset});
-    }
+      SV->remove(V);
     ValueExprMap.erase(V);
   }
 }
@@ -4343,7 +4316,7 @@ void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
   auto It = ValueExprMap.find_as(V);
   if (It == ValueExprMap.end()) {
     ValueExprMap.insert({SCEVCallbackVH(V, this), S});
-    ExprValueMap[S].insert({V, nullptr});
+    ExprValueMap[S].insert(V);
   }
 }
 
@@ -4360,23 +4333,8 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
     // ValueExprMap before insert S->{V, 0} into ExprValueMap.
     std::pair<ValueExprMapType::iterator, bool> Pair =
         ValueExprMap.insert({SCEVCallbackVH(V, this), S});
-    if (Pair.second) {
-      ExprValueMap[S].insert({V, nullptr});
-
-      // If S == Stripped + Offset, add Stripped -> {V, Offset} into
-      // ExprValueMap.
-      const SCEV *Stripped = S;
-      ConstantInt *Offset = nullptr;
-      std::tie(Stripped, Offset) = splitAddExpr(S);
-      // If stripped is SCEVUnknown, don't bother to save
-      // Stripped -> {V, offset}. It doesn't simplify and sometimes even
-      // increase the complexity of the expansion code.
-      // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
-      // because it may generate add/sub instead of GEP in SCEV expansion.
-      if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
-          !isa<GetElementPtrInst>(V))
-        ExprValueMap[Stripped].insert({V, Offset});
-    }
+    if (Pair.second)
+      ExprValueMap[S].insert(V);
   }
   return S;
 }
@@ -13399,12 +13357,10 @@ void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
 
   auto ExprIt = ExprValueMap.find(S);
   if (ExprIt != ExprValueMap.end()) {
-    for (auto &ValueAndOffset : ExprIt->second) {
-      if (ValueAndOffset.second == nullptr) {
-        auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first);
-        if (ValueIt != ValueExprMap.end())
-          ValueExprMap.erase(ValueIt);
-      }
+    for (Value *V : ExprIt->second) {
+      auto ValueIt = ValueExprMap.find_as(V);
+      if (ValueIt != ValueExprMap.end())
+        ValueExprMap.erase(ValueIt);
     }
     ExprValueMap.erase(ExprIt);
   }
@@ -13546,7 +13502,7 @@ void ScalarEvolution::verify() const {
 
     // Check that the value is also part of the reverse map.
     auto It = ExprValueMap.find(KV.second);
-    if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) {
+    if (It == ExprValueMap.end() || !It->second.contains(KV.first)) {
       dbgs() << "Value " << *KV.first
              << " is in ValueExprMap but not in ExprValueMap\n";
       std::abort();
@@ -13554,19 +13510,15 @@ void ScalarEvolution::verify() const {
   }
 
   for (const auto &KV : ExprValueMap) {
-    for (const auto &ValueAndOffset : KV.second) {
-      if (ValueAndOffset.second != nullptr)
-        continue;
-
-      auto It = ValueExprMap.find_as(ValueAndOffset.first);
+    for (Value *V : KV.second) {
+      auto It = ValueExprMap.find_as(V);
       if (It == ValueExprMap.end()) {
-        dbgs() << "Value " << *ValueAndOffset.first
+        dbgs() << "Value " << *V
                << " is in ExprValueMap but not in ValueExprMap\n";
         std::abort();
       }
       if (It->second != KV.first) {
-        dbgs() << "Value " << *ValueAndOffset.first
-               << " mapped to " << *It->second
+        dbgs() << "Value " << *V << " mapped to " << *It->second
                << " rather than " << *KV.first << "\n";
         std::abort();
       }

diff  --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 754382aa0845..00c5b5053f9e 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1870,9 +1870,8 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
   return V;
 }
 
-ScalarEvolution::ValueOffsetPair
-SCEVExpander::FindValueInExprValueMap(const SCEV *S,
-                                      const Instruction *InsertPt) {
+Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
+                                             const Instruction *InsertPt) {
   auto *Set = SE.getSCEVValues(S);
   // If the expansion is not in CanonicalMode, and the SCEV contains any
   // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
@@ -1882,9 +1881,7 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S,
       // Choose a Value from the set which dominates the InsertPt.
       // InsertPt should be inside the Value's parent loop so as not to break
       // the LCSSA form.
-      for (auto const &VOPair : *Set) {
-        Value *V = VOPair.first;
-        ConstantInt *Offset = VOPair.second;
+      for (Value *V : *Set) {
         Instruction *EntInst = dyn_cast_or_null<Instruction>(V);
         if (!EntInst)
           continue;
@@ -1894,11 +1891,11 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S,
             SE.DT.dominates(EntInst, InsertPt) &&
             (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
              SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
-          return {V, Offset};
+          return V;
       }
     }
   }
-  return {nullptr, nullptr};
+  return nullptr;
 }
 
 // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
@@ -1967,9 +1964,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
   Builder.SetInsertPoint(InsertPt);
 
   // Expand the expression into instructions.
-  ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt);
-  Value *V = VO.first;
-
+  Value *V = FindValueInExprValueMap(S, InsertPt);
   if (!V)
     V = visit(S);
   else {
@@ -1980,21 +1975,6 @@ Value *SCEVExpander::expand(const SCEV *S) {
     if (auto *I = dyn_cast<Instruction>(V))
       if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))
         I->dropPoisonGeneratingFlags();
-
-    if (VO.second) {
-      if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
-        int64_t Offset = VO.second->getSExtValue();
-        ConstantInt *Idx =
-          ConstantInt::getSigned(VO.second->getType(), -Offset);
-        unsigned AS = Vty->getAddressSpace();
-        V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
-        V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
-                              "uglygep");
-        V = Builder.CreateBitCast(V, Vty);
-      } else {
-        V = Builder.CreateSub(V, VO.second);
-      }
-    }
   }
   // Remember the expanded value for this SCEV at this location.
   //
@@ -2176,9 +2156,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   return NumElim;
 }
 
-Optional<ScalarEvolution::ValueOffsetPair>
-SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
-                                          Loop *L) {
+Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S,
+                                                 const Instruction *At,
+                                                 Loop *L) {
   using namespace llvm::PatternMatch;
 
   SmallVector<BasicBlock *, 4> ExitingBlocks;
@@ -2195,25 +2175,17 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
       continue;
 
     if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
-      return ScalarEvolution::ValueOffsetPair(LHS, nullptr);
+      return LHS;
 
     if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
-      return ScalarEvolution::ValueOffsetPair(RHS, nullptr);
+      return RHS;
   }
 
   // Use expand's logic which is used for reusing a previous Value in
   // ExprValueMap.  Note that we don't currently model the cost of
   // needing to drop poison generating flags on the instruction if we
   // want to reuse it.  We effectively assume that has zero cost.
-  ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
-  if (VO.first)
-    return VO;
-
-  // There is potential to make this significantly smarter, but this simple
-  // heuristic already gets some interesting cases.
-
-  // Can not find suitable value.
-  return None;
+  return FindValueInExprValueMap(S, At);
 }
 
 template<typename T> static InstructionCost costAndCollectOperands(

diff  --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
index 75439f811860..443d89aaeaed 100644
--- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
+++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
@@ -115,55 +115,57 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    .cfi_offset %r15, -24
 ; CHECK-NEXT:    .cfi_offset %rbp, -16
 ; CHECK-NEXT:    movq x1 at GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movl (%rax), %ecx
-; CHECK-NEXT:    andl $511, %ecx # imm = 0x1FF
-; CHECK-NEXT:    leaq 1(%rcx), %r13
-; CHECK-NEXT:    movq x4 at GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movl %r13d, (%rax)
-; CHECK-NEXT:    movq x3 at GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movl (%rax), %edx
-; CHECK-NEXT:    testl %edx, %edx
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    andl $511, %eax # imm = 0x1FF
+; CHECK-NEXT:    leaq 1(%rax), %rsi
+; CHECK-NEXT:    movq x4 at GOTPCREL(%rip), %rcx
+; CHECK-NEXT:    movl %esi, (%rcx)
+; CHECK-NEXT:    movq x3 at GOTPCREL(%rip), %rcx
+; CHECK-NEXT:    movl (%rcx), %ecx
+; CHECK-NEXT:    testl %ecx, %ecx
 ; CHECK-NEXT:    je .LBB1_18
 ; CHECK-NEXT:  # %bb.1: # %for.cond1thread-pre-split.lr.ph
-; CHECK-NEXT:    movq x5 at GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movq (%rax), %r12
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    leaq 8(,%rax,8), %r14
-; CHECK-NEXT:    imulq %r13, %r14
+; CHECK-NEXT:    movq x5 at GOTPCREL(%rip), %rdi
+; CHECK-NEXT:    movq (%rdi), %r12
+; CHECK-NEXT:    movl %ecx, %edi
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    leaq 8(,%rdi,8), %r14
+; CHECK-NEXT:    imulq %rsi, %r14
 ; CHECK-NEXT:    addq %r12, %r14
 ; CHECK-NEXT:    movq x2 at GOTPCREL(%rip), %r15
-; CHECK-NEXT:    movl (%r15), %eax
-; CHECK-NEXT:    leal 8(,%rcx,8), %ecx
-; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    leaq 8(%r12), %rcx
-; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    leaq 32(%r12), %rbx
-; CHECK-NEXT:    shlq $3, %r13
-; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:    movq x0 at GOTPCREL(%rip), %rcx
-; CHECK-NEXT:    movq %r12, %rdi
+; CHECK-NEXT:    movl (%r15), %ebx
+; CHECK-NEXT:    leal 8(,%rax,8), %eax
+; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    leaq 8(%r12), %rax
+; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    leaq 32(%r12), %rax
+; CHECK-NEXT:    andl $511, %edx # imm = 0x1FF
+; CHECK-NEXT:    leaq 8(,%rdx,8), %r13
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movq x0 at GOTPCREL(%rip), %rdx
+; CHECK-NEXT:    movq %r12, %rsi
 ; CHECK-NEXT:    jmp .LBB1_2
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB1_15: # %for.cond1.for.inc3_crit_edge
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    movl %eax, (%r15)
+; CHECK-NEXT:    movl %ebx, (%r15)
 ; CHECK-NEXT:  .LBB1_16: # %for.inc3
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    addq %r13, %rdi
-; CHECK-NEXT:    incq %rsi
-; CHECK-NEXT:    addq %r13, %rbx
-; CHECK-NEXT:    incl %edx
+; CHECK-NEXT:    addq %r13, %rsi
+; CHECK-NEXT:    incq %rdi
+; CHECK-NEXT:    addq %r13, %rax
+; CHECK-NEXT:    incl %ecx
 ; CHECK-NEXT:    je .LBB1_17
 ; CHECK-NEXT:  .LBB1_2: # %for.cond1thread-pre-split
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB1_12 Depth 2
 ; CHECK-NEXT:    # Child Loop BB1_14 Depth 2
-; CHECK-NEXT:    testl %eax, %eax
+; CHECK-NEXT:    testl %ebx, %ebx
 ; CHECK-NEXT:    jns .LBB1_16
 ; CHECK-NEXT:  # %bb.3: # %for.body2.preheader
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    movslq %eax, %r9
+; CHECK-NEXT:    movslq %ebx, %r9
 ; CHECK-NEXT:    testq %r9, %r9
 ; CHECK-NEXT:    movq $-1, %rbp
 ; CHECK-NEXT:    cmovnsq %r9, %rbp
@@ -178,76 +180,76 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    je .LBB1_14
 ; CHECK-NEXT:  # %bb.5: # %vector.memcheck
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
-; CHECK-NEXT:    imulq %rsi, %r10
-; CHECK-NEXT:    leaq (%r12,%r10), %rax
-; CHECK-NEXT:    leaq (%rax,%r9,8), %rax
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; CHECK-NEXT:    imulq %rdi, %r11
+; CHECK-NEXT:    leaq (%r12,%r11), %rbx
+; CHECK-NEXT:    leaq (%rbx,%r9,8), %rbx
 ; CHECK-NEXT:    testq %r9, %r9
-; CHECK-NEXT:    movq $-1, %r11
-; CHECK-NEXT:    cmovnsq %r9, %r11
-; CHECK-NEXT:    cmpq %rcx, %rax
+; CHECK-NEXT:    movq $-1, %r10
+; CHECK-NEXT:    cmovnsq %r9, %r10
+; CHECK-NEXT:    cmpq %rdx, %rbx
 ; CHECK-NEXT:    jae .LBB1_7
 ; CHECK-NEXT:  # %bb.6: # %vector.memcheck
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
-; CHECK-NEXT:    leaq (%r10,%r11,8), %rax
-; CHECK-NEXT:    cmpq %rcx, %rax
+; CHECK-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; CHECK-NEXT:    leaq (%r11,%r10,8), %rbx
+; CHECK-NEXT:    cmpq %rdx, %rbx
 ; CHECK-NEXT:    ja .LBB1_14
 ; CHECK-NEXT:  .LBB1_7: # %vector.body.preheader
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    leaq -4(%r8), %rax
-; CHECK-NEXT:    movq %rax, %r10
-; CHECK-NEXT:    shrq $2, %r10
-; CHECK-NEXT:    btl $2, %eax
+; CHECK-NEXT:    leaq -4(%r8), %rbx
+; CHECK-NEXT:    movq %rbx, %r11
+; CHECK-NEXT:    shrq $2, %r11
+; CHECK-NEXT:    btl $2, %ebx
 ; CHECK-NEXT:    jb .LBB1_8
 ; CHECK-NEXT:  # %bb.9: # %vector.body.prol.preheader
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT:    movdqu %xmm0, (%rdi,%r9,8)
-; CHECK-NEXT:    movdqu %xmm0, 16(%rdi,%r9,8)
-; CHECK-NEXT:    movl $4, %r11d
-; CHECK-NEXT:    testq %r10, %r10
+; CHECK-NEXT:    movdqu %xmm0, (%rsi,%r9,8)
+; CHECK-NEXT:    movdqu %xmm0, 16(%rsi,%r9,8)
+; CHECK-NEXT:    movl $4, %r10d
+; CHECK-NEXT:    testq %r11, %r11
 ; CHECK-NEXT:    jne .LBB1_11
 ; CHECK-NEXT:    jmp .LBB1_13
 ; CHECK-NEXT:  .LBB1_8: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    xorl %r11d, %r11d
-; CHECK-NEXT:    testq %r10, %r10
+; CHECK-NEXT:    xorl %r10d, %r10d
+; CHECK-NEXT:    testq %r11, %r11
 ; CHECK-NEXT:    je .LBB1_13
 ; CHECK-NEXT:  .LBB1_11: # %vector.body.preheader.new
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT:    movq %r11, %rax
-; CHECK-NEXT:    subq %r8, %rax
-; CHECK-NEXT:    addq %r9, %r11
-; CHECK-NEXT:    leaq (%rbx,%r11,8), %r11
+; CHECK-NEXT:    movq %r10, %rbx
+; CHECK-NEXT:    subq %r8, %rbx
+; CHECK-NEXT:    addq %r9, %r10
+; CHECK-NEXT:    leaq (%rax,%r10,8), %r10
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB1_12: # %vector.body
 ; CHECK-NEXT:    # Parent Loop BB1_2 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    movdqu %xmm0, -32(%r11)
-; CHECK-NEXT:    movdqu %xmm0, -16(%r11)
-; CHECK-NEXT:    movdqu %xmm0, (%r11)
-; CHECK-NEXT:    movdqu %xmm0, 16(%r11)
-; CHECK-NEXT:    addq $64, %r11
-; CHECK-NEXT:    addq $8, %rax
+; CHECK-NEXT:    movdqu %xmm0, -32(%r10)
+; CHECK-NEXT:    movdqu %xmm0, -16(%r10)
+; CHECK-NEXT:    movdqu %xmm0, (%r10)
+; CHECK-NEXT:    movdqu %xmm0, 16(%r10)
+; CHECK-NEXT:    addq $64, %r10
+; CHECK-NEXT:    addq $8, %rbx
 ; CHECK-NEXT:    jne .LBB1_12
 ; CHECK-NEXT:  .LBB1_13: # %middle.block
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    addq %r8, %r9
 ; CHECK-NEXT:    cmpq %r8, %rbp
-; CHECK-NEXT:    movq %r9, %rax
+; CHECK-NEXT:    movq %r9, %rbx
 ; CHECK-NEXT:    je .LBB1_15
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB1_14: # %for.body2
 ; CHECK-NEXT:    # Parent Loop BB1_2 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    movq (%rcx), %rax
-; CHECK-NEXT:    movq %rax, (%rdi,%r9,8)
-; CHECK-NEXT:    leaq 1(%r9), %rax
+; CHECK-NEXT:    movq (%rdx), %rbp
+; CHECK-NEXT:    movq %rbp, (%rsi,%r9,8)
+; CHECK-NEXT:    leaq 1(%r9), %rbx
 ; CHECK-NEXT:    cmpq $-1, %r9
-; CHECK-NEXT:    movq %rax, %r9
+; CHECK-NEXT:    movq %rbx, %r9
 ; CHECK-NEXT:    jl .LBB1_14
 ; CHECK-NEXT:    jmp .LBB1_15
 ; CHECK-NEXT:  .LBB1_17: # %for.cond.for.end5_crit_edge

diff  --git a/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll b/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll
index 69e96d53f220..d60f0c6cd2a1 100644
--- a/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll
+++ b/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll
@@ -239,7 +239,7 @@ define dso_local void @NegativeFor64(i32* %ar, i64 %n, i64 %m) #0 {
 ; CHECK-NEXT:    [[MUL3:%.*]] = mul nsw i64 [[M:%.*]], 4
 ; CHECK-NEXT:    [[CONV4:%.*]] = trunc i64 [[MUL3]] to i32
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[M]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[CONV]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[N]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP2]], 2
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* align 4 [[AR1]], i8 0, i32 [[TMP3]], i1 false)

diff  --git a/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll b/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll
index 5d485b82ada7..ed74a17fa7b8 100644
--- a/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll
+++ b/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll
@@ -235,7 +235,7 @@ define void @Negative32(i32* %ar, i32 %n, i32 %m) {
 ; CHECK-NEXT:    [[CONV1:%.*]] = sext i32 [[M:%.*]] to i64
 ; CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[M]] to i64
 ; CHECK-NEXT:    [[MUL3:%.*]] = mul i64 [[CONV2]], 4
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[CONV]], -1
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[N]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[CONV1]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 2
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[AR1]], i8 0, i64 [[TMP2]], i1 false)