[llvm] 4ef272e - [InstCombine] DCE instructions earlier

Thu Feb 27 09:46:22 PST 2020

Author: Nikita Popov
Date: 2020-02-27T18:45:59+01:00
New Revision: 4ef272ec9c5f737b8d63a3cc8870d8d63bf0385e

URL: https://github.com/llvm/llvm-project/commit/4ef272ec9c5f737b8d63a3cc8870d8d63bf0385e
DIFF: https://github.com/llvm/llvm-project/commit/4ef272ec9c5f737b8d63a3cc8870d8d63bf0385e.diff

LOG: [InstCombine] DCE instructions earlier

When InstCombine initially populates the worklist, it already
performs constant folding and DCE. However, as the instructions
are initially visited in program order, this DCE can pick up only
the last instruction of a dead chain, the rest would only get
picked up in the main InstCombine run.

To avoid this, we instead perform the DCE in separate pass over the
collected instructions in reverse order, which will allow us to
pick up full dead instruction chains. We already need to do this
reverse iteration anyway to populate the worklist, so this
shouldn't add extra cost.

This by itself only fixes a small part of the problem though:
The same basic issue also applies during the main InstCombine loop.
We generally always want DCE to occur as early as possible,
because it will allow one-use folds to happen. Address this by also
performing DCE while adding deferred instructions to the main worklist.

This drops the number of tests that perform more than 2 InstCombine
iterations from ~80 to ~40. There's some spurious test changes due
to operand order / icmp toggling.

Differential Revision: https://reviews.llvm.org/D75008

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
    llvm/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
    llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
    llvm/test/Transforms/InstCombine/logical-select.ll
    llvm/test/Transforms/InstCombine/pr44245.ll
    llvm/test/Transforms/InstCombine/select-imm-canon.ll
    llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
    llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
    llvm/test/Transforms/InstCombine/vec_sext.ll
    llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
index f019bb5ecf39..25aabe199d0f 100644

--- a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -38,7 +38,7 @@ class InstCombineWorklist {
   InstCombineWorklist(InstCombineWorklist &&) = default;
   InstCombineWorklist &operator=(InstCombineWorklist &&) = default;
 
-  bool isEmpty() const { return Worklist.empty(); }
+  bool isEmpty() const { return Worklist.empty() && Deferred.empty(); }
 
   /// Add instruction to the worklist.
   /// Instructions will be visited in the order they are added.
@@ -72,26 +72,15 @@ class InstCombineWorklist {
       push(I);
   }
 
-  void addDeferredInstructions() {
-    for (Instruction *I : reverse(Deferred))
-      push(I);
-    Deferred.clear();
+  Instruction *popDeferred() {
+    if (Deferred.empty())
+      return nullptr;
+    return Deferred.pop_back_val();
   }
 
-  /// AddInitialGroup - Add the specified batch of stuff in reverse order.
-  /// which should only be done when the worklist is empty and when the group
-  /// has no duplicates.
-  void addInitialGroup(ArrayRef<Instruction *> List) {
-    assert(Worklist.empty() && "Worklist must be empty to add initial group");
-    Worklist.reserve(List.size()+16);
-    WorklistMap.reserve(List.size());
-    LLVM_DEBUG(dbgs() << "IC: ADDING: " << List.size()
-                      << " instrs to worklist\n");
-    unsigned Idx = 0;
-    for (Instruction *I : reverse(List)) {
-      WorklistMap.insert(std::make_pair(I, Idx++));
-      Worklist.push_back(I);
-    }
+  void reserve(size_t Size) {
+    Worklist.reserve(Size + 16);
+    WorklistMap.reserve(Size);
   }
 
   /// Remove I from the worklist if it exists.
@@ -107,6 +96,8 @@ class InstCombineWorklist {
   }
 
   Instruction *removeOne() {
+    if (Worklist.empty())
+      return nullptr;
     Instruction *I = Worklist.pop_back_val();
     WorklistMap.erase(I);
     return I;

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index f80da4395461..38ba742bf5fa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -724,7 +724,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
     if (I.getNumOperands() < 8) {
       for (Use &Operand : I.operands())
         if (auto *Inst = dyn_cast<Instruction>(Operand))
-          Worklist.push(Inst);
+          Worklist.add(Inst);
     }
     Worklist.remove(&I);
     I.eraseFromParent();

diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index e25dfdef474d..58d49d87221e 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3406,6 +3406,22 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
 
 bool InstCombiner::run() {
   while (!Worklist.isEmpty()) {
+    // Walk deferred instructions in reverse order, and push them to the
+    // worklist, which means they'll end up popped from the worklist in-order.
+    while (Instruction *I = Worklist.popDeferred()) {
+      // Check to see if we can DCE the instruction. We do this already here to
+      // reduce the number of uses and thus allow other folds to trigger.
+      // Note that eraseInstFromFunction() may push additional instructions on
+      // the deferred worklist, so this will DCE whole instruction chains.
+      if (isInstructionTriviallyDead(I, &TLI)) {
+        eraseInstFromFunction(*I);
+        ++NumDeadInst;
+        continue;
+      }
+
+      Worklist.push(I);
+    }
+
     Instruction *I = Worklist.removeOne();
     if (I == nullptr) continue;  // skip null values.
 
@@ -3552,7 +3568,6 @@ bool InstCombiner::run() {
       }
       MadeIRChange = true;
     }
-    Worklist.addDeferredInstructions();
   }
 
   Worklist.zap();
@@ -3588,16 +3603,6 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
     for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
       Instruction *Inst = &*BBI++;
 
-      // DCE instruction if trivially dead.
-      if (isInstructionTriviallyDead(Inst, TLI)) {
-        ++NumDeadInst;
-        LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
-        salvageDebugInfoOrMarkUndef(*Inst);
-        Inst->eraseFromParent();
-        MadeIRChange = true;
-        continue;
-      }
-
       // ConstantProp instruction if trivially constant.
       if (!Inst->use_empty() &&
           (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
@@ -3665,7 +3670,21 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
   // of the function down.  This jives well with the way that it adds all uses
   // of instructions to the worklist after doing a transformation, thus avoiding
   // some N^2 behavior in pathological cases.
-  ICWorklist.addInitialGroup(InstrsForInstCombineWorklist);
+  ICWorklist.reserve(InstrsForInstCombineWorklist.size());
+  for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) {
+    // DCE instruction if trivially dead. As we iterate in reverse program
+    // order here, we will clean up whole chains of dead instructions.
+    if (isInstructionTriviallyDead(Inst, TLI)) {
+      ++NumDeadInst;
+      LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
+      salvageDebugInfoOrMarkUndef(*Inst);
+      Inst->eraseFromParent();
+      MadeIRChange = true;
+      continue;
+    }
+
+    ICWorklist.push(Inst);
+  }
 
   return MadeIRChange;
 }

diff  --git a/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
index ed32bb1aa9d5..169bf7035a80 100644
--- a/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -instcombine-infinite-loop-threshold=3 -S < %s | FileCheck %s
+; RUN: opt -instcombine -instcombine-infinite-loop-threshold=2 -S < %s | FileCheck %s
 
 ; <rdar://problem/8606771>
 define i32 @main(i32 %argc) {

diff  --git a/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll b/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
index c378033eef72..8d03a55b5b5a 100644
--- a/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
+++ b/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
@@ -24,7 +24,7 @@ define i1 @positive_easyinvert(i16 %x, i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[X:%.*]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %tmp1 = icmp slt i16 %x, 0
@@ -38,7 +38,7 @@ define i1 @positive_easyinvert0(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %tmp1 = call i1 @gen1()
@@ -52,7 +52,7 @@ define i1 @positive_easyinvert1(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %tmp1 = call i1 @gen1()

diff  --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll
index ac14fceb5c3a..e14e2bf964d9 100644
--- a/llvm/test/Transforms/InstCombine/logical-select.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select.ll
@@ -535,8 +535,8 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c)
 
 define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 ; CHECK-LABEL: @allSignBits(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[COND:%.*]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TVAL:%.*]], i32 [[FVAL:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[COND:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]]
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %bitmask = ashr i32 %cond, 31
@@ -549,8 +549,8 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 
 define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
 ; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TVAL:%.*]], <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
 ;
   %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>

diff  --git a/llvm/test/Transforms/InstCombine/pr44245.ll b/llvm/test/Transforms/InstCombine/pr44245.ll
index 15e4df21dc51..f7eb806ba4c6 100644
--- a/llvm/test/Transforms/InstCombine/pr44245.ll
+++ b/llvm/test/Transforms/InstCombine/pr44245.ll
@@ -159,9 +159,9 @@ define void @test_2(i1 %c) local_unnamed_addr {
 ; CHECK:       cond.true133:
 ; CHECK-NEXT:    br label [[COND_END144:%.*]]
 ; CHECK:       cond.false138:
+; CHECK-NEXT:    store %type_2* undef, %type_2** null, align 536870912
 ; CHECK-NEXT:    br label [[COND_END144]]
 ; CHECK:       cond.end144:
-; CHECK-NEXT:    store %type_3* undef, %type_3** null, align 536870912
 ; CHECK-NEXT:    br label [[WHILE_COND]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/InstCombine/select-imm-canon.ll b/llvm/test/Transforms/InstCombine/select-imm-canon.ll
index 4c59be668626..272d4a47ce68 100644
--- a/llvm/test/Transforms/InstCombine/select-imm-canon.ll
+++ b/llvm/test/Transforms/InstCombine/select-imm-canon.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s
+; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s
 
 define i8 @single(i32 %A) {
 ; CHECK-LABEL: @single(

diff  --git a/llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
index 6cbb25288fb5..66609f141843 100644
--- a/llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
@@ -12,7 +12,7 @@
 
 define i8 @sub_ashr_and_i8(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 0
 ; CHECK-NEXT:    ret i8 [[AND]]
 ;
@@ -24,7 +24,7 @@ define i8 @sub_ashr_and_i8(i8 %x, i8 %y) {
 
 define i16 @sub_ashr_and_i16(i16 %x, i16 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i16 [[X]], i16 0
 ; CHECK-NEXT:    ret i16 [[AND]]
 ;
@@ -37,7 +37,7 @@ define i16 @sub_ashr_and_i16(i16 %x, i16 %y) {
 
 define i32 @sub_ashr_and_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@@ -49,7 +49,7 @@ define i32 @sub_ashr_and_i32(i32 %x, i32 %y) {
 
 define i64 @sub_ashr_and_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i64 [[X]], i64 0
 ; CHECK-NEXT:    ret i64 [[AND]]
 ;
@@ -63,7 +63,7 @@ define i64 @sub_ashr_and_i64(i64 %x, i64 %y) {
 
 define i32 @sub_ashr_and_i32_nuw_nsw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@@ -77,7 +77,7 @@ define i32 @sub_ashr_and_i32_nuw_nsw(i32 %x, i32 %y) {
 
 define i32 @sub_ashr_and_i32_commute(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@@ -91,7 +91,7 @@ define i32 @sub_ashr_and_i32_commute(i32 %x, i32 %y) {
 
 define <4 x i32> @sub_ashr_and_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@@ -103,7 +103,7 @@ define <4 x i32> @sub_ashr_and_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_and_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@@ -115,7 +115,7 @@ define <4 x i32> @sub_ashr_and_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_and_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@@ -144,7 +144,7 @@ define i32 @sub_ashr_and_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {
 
 define i32 @sub_ashr_and_i32_extra_use_and(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @sub_ashr_and_i32_extra_use_and(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    store i32 [[AND]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[AND]]

diff  --git a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
index 6289e8f64b6e..937bff62dc7a 100644
--- a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
@@ -26,7 +26,7 @@ define i32 @clamp255_i32(i32 %x) {
 
 define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i8 -1, i8 [[X]]
 ; CHECK-NEXT:    ret i8 [[OR]]
 ;
@@ -38,7 +38,7 @@ define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
 
 define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i16 -1, i16 [[X]]
 ; CHECK-NEXT:    ret i16 [[OR]]
 ;
@@ -50,7 +50,7 @@ define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
 
 define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@@ -62,7 +62,7 @@ define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
 
 define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i64 -1, i64 [[X]]
 ; CHECK-NEXT:    ret i64 [[OR]]
 ;
@@ -76,7 +76,7 @@ define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
 
 define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@@ -90,7 +90,7 @@ define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
 
 define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@@ -104,7 +104,7 @@ define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
 
 define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -116,7 +116,7 @@ define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -128,7 +128,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -157,7 +157,7 @@ define i32 @sub_ashr_or_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {
 
 define i32 @sub_ashr_or_i32_extra_use_or(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @sub_ashr_or_i32_extra_use_or(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    store i32 [[OR]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[OR]]

diff  --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll
index e0e263b7b0bc..39bd40874160 100644
--- a/llvm/test/Transforms/InstCombine/vec_sext.ll
+++ b/llvm/test/Transforms/InstCombine/vec_sext.ll
@@ -4,8 +4,8 @@
 define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %cmp = icmp slt <4 x i32> %b, zeroinitializer
@@ -23,8 +23,8 @@ define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select_alternate_sign_bit_test(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>

diff  --git a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
index 5ad0e139bc2b..e9a3e608ea29 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -77,9 +77,9 @@ end:
 define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: @test_recursive(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[C:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[X4]], true
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]