[llvm] 15d5c59 - [InstCombine] Improvement the analytics through the dominating condition

Wed Mar 1 01:03:44 PST 2023

Author: Zhongyunde
Date: 2023-03-01T17:03:34+08:00
New Revision: 15d5c59280c9943b23a372ca5fdd8a88ce930514

URL: https://github.com/llvm/llvm-project/commit/15d5c59280c9943b23a372ca5fdd8a88ce930514
DIFF: https://github.com/llvm/llvm-project/commit/15d5c59280c9943b23a372ca5fdd8a88ce930514.diff

LOG: [InstCombine] Improvement the analytics through the dominating condition

Address the dominating condition, the urem fold is benefit from the analytics improvements.
Fix https://github.com/llvm/llvm-project/issues/60546

NOTE: delete the calls in simplifyBinaryIntrinsic and foldICmpWithDominatingICmp
is used to reduce compile time.

Reviewed By: nikic, arsenm, erikdesjardins
Differential Revision: https://reviews.llvm.org/D144248

Added: 
    

Modified: 
    llvm/lib/Analysis/InstructionSimplify.cpp
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
    llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
    llvm/test/CodeGen/AMDGPU/infinite-loop.ll
    llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
    llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
    llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
    llvm/test/Transforms/InstCombine/urem-via-cmp-select.ll
    llvm/test/Transforms/InstSimplify/select-implied.ll
    llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 18b6aa442db6..d6f3585fcc58 100644

--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -3955,6 +3955,10 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (Value *V = simplifyICmpWithDominatingAssume(Pred, LHS, RHS, Q))
     return V;
 
+  if (std::optional<bool> Res =
+          isImpliedByDomCondition(Pred, LHS, RHS, Q.CxtI, Q.DL))
+    return ConstantInt::getBool(ITy, *Res);
+
   // Simplify comparisons of related pointers using a powerful, recursive
   // GEP-walk when we have target data available..
   if (LHS->getType()->isPointerTy())
@@ -6187,13 +6191,6 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
     if (isICmpTrue(Pred, Op1, Op0, Q.getWithoutUndef(), RecursionLimit))
       return Op1;
 
-    if (std::optional<bool> Imp =
-            isImpliedByDomCondition(Pred, Op0, Op1, Q.CxtI, Q.DL))
-      return *Imp ? Op0 : Op1;
-    if (std::optional<bool> Imp =
-            isImpliedByDomCondition(Pred, Op1, Op0, Q.CxtI, Q.DL))
-      return *Imp ? Op1 : Op0;
-
     break;
   }
   case Intrinsic::usub_with_overflow:

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2300455a7934..1616c0e90ecc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1382,11 +1382,8 @@ Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
   if (TrueBB == FalseBB)
     return nullptr;
 
-  // Try to simplify this compare to T/F based on the dominating condition.
-  std::optional<bool> Imp =
-      isImpliedCondition(DomCond, &Cmp, DL, TrueBB == CmpBB);
-  if (Imp)
-    return replaceInstUsesWith(Cmp, ConstantInt::get(Cmp.getType(), *Imp));
+  // We already checked simple implication in InstSimplify, only handle complex
+  // cases here.
 
   CmpInst::Predicate Pred = Cmp.getPredicate();
   Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);

diff  --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 3cd821b367ff..9e1e61848320 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -598,7 +598,7 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32
 ; GCN-NEXT:  .LBB10_2:
 ; GCN-NEXT:    s_mov_b64 s[8:9], 0
 ; GCN-NEXT:  .LBB10_3: ; %bb9
-; GCN-NEXT:    s_cmp_lt_i32 s7, 1
+; GCN-NEXT:    s_cmp_lt_i32 s7, 11
 ; GCN-NEXT:    s_cselect_b64 s[8:9], -1, 0
 ; GCN-NEXT:    s_cmp_ge_i32 s6, s7
 ; GCN-NEXT:    s_cselect_b64 s[10:11], -1, 0
@@ -641,8 +641,7 @@ bb:
   br i1 %tmp8, label %bb9, label %bb13
 
 bb9:                                              ; preds = %bb
-  %tmp7 = icmp sgt i32 %arg4, 0
-  %tmp10 = and i1 %tmp7, %tmp
+  %tmp7 = icmp sgt i32 %arg4, 10                  ; avoid being optimized away through the domination
   %tmp11 = icmp slt i32 %arg3, %arg4
   %tmp12 = or i1 %tmp11, %tmp7
   br i1 %tmp12, label %bb19, label %bb14

diff  --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 3c62b35db9da..50fdf34d8b6b 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -528,7 +528,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-NEXT:    s_mov_b32 s0, s2
 ; GCN-NEXT:    s_mov_b32 s1, s2
 ; GCN-NEXT:    v_mov_b32_e32 v3, 3
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 offset:12
 ; GCN-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GCN-NEXT:    s_cbranch_execz .LBB3_3
@@ -672,19 +672,18 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    buffer_load_dword v3, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    buffer_load_dword v4, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_mov_b32 s0, 0xf000
-; GCN-O0-NEXT:    s_mov_b32 s2, 0
-; GCN-O0-NEXT:    s_mov_b32 s4, s2
+; GCN-O0-NEXT:    s_mov_b32 s1, 0xf000
+; GCN-O0-NEXT:    s_mov_b32 s0, 0
+; GCN-O0-NEXT:    s_mov_b32 s2, s0
+; GCN-O0-NEXT:    s_mov_b32 s3, s1
+; GCN-O0-NEXT:    s_mov_b32 s4, s0
 ; GCN-O0-NEXT:    s_mov_b32 s5, s0
-; GCN-O0-NEXT:    s_mov_b32 s0, s2
-; GCN-O0-NEXT:    s_mov_b32 s1, s2
-; GCN-O0-NEXT:    ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
-; GCN-O0-NEXT:    s_mov_b64 s[2:3], s[4:5]
+; GCN-O0-NEXT:    ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
+; GCN-O0-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    v_mov_b32_e32 v2, 3
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    buffer_store_dword v2, v[3:4], s[0:3], 0 addr64 offset:12
-; GCN-O0-NEXT:    s_mov_b32 s0, 2
+; GCN-O0-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 offset:12
 ; GCN-O0-NEXT:    v_cmp_eq_u32_e64 s[2:3], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[0:1], exec
 ; GCN-O0-NEXT:    v_writelane_b32 v1, s0, 6
@@ -747,7 +746,7 @@ bb.inner.then:
 bb.outer.else:
   %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %tmp1, i32 3
   store i32 3, ptr addrspace(1) %tmp4, align 4
-  %cc3 = icmp eq i32 %tmp, 2
+  %cc3 = icmp eq i32 %tmp, 0   ; avoid being optimized away through the domination
   br i1 %cc3, label %bb.inner.then2, label %bb.outer.end
 
 bb.inner.then2:

diff  --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
index 3be8470a8206..01d281e6572b 100644
--- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
@@ -147,7 +147,7 @@ loop2:
 define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
 ; SI-LABEL: infinite_loop_nest_ret:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; SI-NEXT:    s_cbranch_execz .LBB3_5
 ; SI-NEXT:  ; %bb.1: ; %outer_loop.preheader
@@ -180,7 +180,7 @@ define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
 ; IR-LABEL: @infinite_loop_nest_ret(
 ; IR-NEXT:  entry:
 ; IR-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
-; IR-NEXT:    [[COND1:%.*]] = icmp eq i32 [[TMP]], 1
+; IR-NEXT:    [[COND1:%.*]] = icmp ne i32 [[TMP]], 1
 ; IR-NEXT:    br i1 [[COND1]], label [[OUTER_LOOP:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
 ; IR:       outer_loop:
 ; IR-NEXT:    br label [[INNER_LOOP:%.*]]
@@ -195,7 +195,7 @@ define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
 ;
 entry:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %cond1 = icmp eq i32 %tmp, 1
+  %cond1 = icmp ne i32 %tmp, 1  ; avoid following BB optimizing away through the domination
   br i1 %cond1, label %outer_loop, label %return
 
 outer_loop:

diff  --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
index b7b8cbf386ed..0873638d335b 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -810,21 +810,19 @@ define amdgpu_kernel void @cse_uniform_condition_
diff erent_blocks(i32 %cond, ptr
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0x9
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_cmp_lt_i32 s2, 1
-; SI-NEXT:    s_cbranch_scc1 .LBB14_3
+; SI-NEXT:    s_cbranch_scc1 .LBB14_2
 ; SI-NEXT:  ; %bb.1: ; %bb2
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    s_cbranch_scc1 .LBB14_3
-; SI-NEXT:  ; %bb.2: ; %bb7
-; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
-; SI-NEXT:  .LBB14_3: ; %bb9
+; SI-NEXT:  .LBB14_2: ; %bb9
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: cse_uniform_condition_
diff erent_blocks:
@@ -832,21 +830,19 @@ define amdgpu_kernel void @cse_uniform_condition_
diff erent_blocks(i32 %cond, ptr
 ; VI-NEXT:    s_load_dword s2, s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_cmp_lt_i32 s2, 1
-; VI-NEXT:    s_cbranch_scc1 .LBB14_3
+; VI-NEXT:    s_cbranch_scc1 .LBB14_2
 ; VI-NEXT:  ; %bb.1: ; %bb2
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, 0
 ; VI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    s_cbranch_scc1 .LBB14_3
-; VI-NEXT:  ; %bb.2: ; %bb7
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
-; VI-NEXT:  .LBB14_3: ; %bb9
+; VI-NEXT:  .LBB14_2: ; %bb9
 ; VI-NEXT:    s_endpgm
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
index 405c8f8ca9af..b5c9b903e184 100644
--- a/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
@@ -11,7 +11,7 @@ define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly %
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    cmp r0, #1
+; CHECK-NEXT:    cmp r0, #2
 ; CHECK-NEXT:    bls.w .LBB0_12
 ; CHECK-NEXT:  @ %bb.1: @ %for.body.us.preheader
 ; CHECK-NEXT:    movw r5, :lower16:arr_183
@@ -189,7 +189,7 @@ define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly %
 ; CHECK-NEXT:    letp lr, .LBB0_24
 ; CHECK-NEXT:    b .LBB0_14
 entry:
-  %cmp = icmp ugt i8 %b, 1
+  %cmp = icmp ugt i8 %b, 2  ; avoid following BB optimizing away through the domination
   br i1 %cmp, label %for.body.us.preheader, label %for.cond.preheader
 
 for.cond.preheader:                               ; preds = %entry

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
index 147fc64f9c92..9a6c59b91aca 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
@@ -144,7 +144,6 @@ define i32 @ctz3(i32 %x) {
 ; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[X]], 0
 ; CHECK-NEXT:    br label [[RETURN]]
 ; CHECK:       return:
 ; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[TMP0]], [[IF_END]] ], [ 32, [[ENTRY:%.*]] ]

diff  --git a/llvm/test/Transforms/InstCombine/urem-via-cmp-select.ll b/llvm/test/Transforms/InstCombine/urem-via-cmp-select.ll
index b3304199d6c6..02be67a2ca25 100644
--- a/llvm/test/Transforms/InstCombine/urem-via-cmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/urem-via-cmp-select.ll
@@ -103,12 +103,64 @@ define i8 @urem_without_assume(i8 %arg, i8 %arg2) {
   ret i8 %out
 }
 
-; TODO: https://alive2.llvm.org/ce/z/eHkgRa
+; https://alive2.llvm.org/ce/z/eHkgRa
 define i8 @urem_with_dominating_condition(i8 %x, i8 %n) {
 ; CHECK-LABEL: @urem_with_dominating_condition(
-; CHECK-NEXT:    [[COND:%.*]] = icmp ult i8 [[X:%.*]], [[N:%.*]]
+; CHECK-NEXT:  start:
+; CHECK-NEXT:    [[X_FR:%.*]] = freeze i8 [[X:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i8 [[X_FR]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[DOTBB0:%.*]], label [[DOTBB1:%.*]]
 ; CHECK:       .bb0:
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X_FR]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i8 [[ADD]], [[N]]
+; CHECK-NEXT:    [[OUT:%.*]] = select i1 [[TMP0]], i8 0, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[OUT]]
+; CHECK:       .bb1:
+; CHECK-NEXT:    ret i8 0
+;
+start:
+  %cond = icmp ult i8 %x, %n
+  br i1 %cond, label %.bb0, label %.bb1 ; Should also works for a dominating condition
+.bb0:
+  %add = add i8 %x, 1
+  %out = urem i8 %add, %n
+  ret i8 %out
+.bb1:
+  ret i8 0
+}
+
+; Revert the dominating condition and target branch at the same time.
+define i8 @urem_with_dominating_condition_false(i8 %x, i8 %n) {
+; CHECK-LABEL: @urem_with_dominating_condition_false(
+; CHECK-NEXT:  start:
+; CHECK-NEXT:    [[X_FR:%.*]] = freeze i8 [[X:%.*]]
+; CHECK-NEXT:    [[COND_NOT:%.*]] = icmp ult i8 [[X_FR]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[COND_NOT]], label [[DOTBB0:%.*]], label [[DOTBB1:%.*]]
+; CHECK:       .bb0:
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X_FR]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i8 [[ADD]], [[N]]
+; CHECK-NEXT:    [[OUT:%.*]] = select i1 [[TMP0]], i8 0, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[OUT]]
+; CHECK:       .bb1:
+; CHECK-NEXT:    ret i8 0
+;
+start:
+  %cond = icmp uge i8 %x, %n
+  br i1 %cond, label %.bb1, label %.bb0 ; Swap the branch targets
+.bb0:
+  %add = add i8 %x, 1
+  %out = urem i8 %add, %n
+  ret i8 %out
+.bb1:
+  ret i8 0
+}
+
+; Negative test
+define noundef i8 @urem_with_opposite_condition(i8 %x, i8 %n) {
+; CHECK-LABEL: @urem_with_opposite_condition(
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i8 [[X:%.*]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[COND]], label [[DOTBB1:%.*]], label [[DOTBB0:%.*]]
+; CHECK:       .bb0:
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], 1
 ; CHECK-NEXT:    [[OUT:%.*]] = urem i8 [[ADD]], [[N]]
 ; CHECK-NEXT:    ret i8 [[OUT]]
@@ -116,7 +168,7 @@ define i8 @urem_with_dominating_condition(i8 %x, i8 %n) {
 ; CHECK-NEXT:    ret i8 0
 ;
   %cond = icmp ult i8 %x, %n
-  br i1 %cond, label %.bb0, label %.bb1 ; Should also works for a dominating condition
+  br i1 %cond, label %.bb1, label %.bb0 ; Revert the condition
 .bb0:
   %add = add i8 %x, 1
   %out = urem i8 %add, %n

diff  --git a/llvm/test/Transforms/InstSimplify/select-implied.ll b/llvm/test/Transforms/InstSimplify/select-implied.ll
index a420ad17636f..97448833a3ce 100644
--- a/llvm/test/Transforms/InstSimplify/select-implied.ll
+++ b/llvm/test/Transforms/InstSimplify/select-implied.ll
@@ -108,8 +108,7 @@ define void @test4(i32 %len) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[LEN]], 4
 ; CHECK-NEXT:    br i1 [[CMP]], label [[BB:%.*]], label [[B1:%.*]]
 ; CHECK:       bb:
-; CHECK-NEXT:    [[CMP11:%.*]] = icmp eq i32 [[LEN]], 8
-; CHECK-NEXT:    br i1 [[CMP11]], label [[B0:%.*]], label [[B1]]
+; CHECK-NEXT:    br i1 false, label [[B0:%.*]], label [[B1]]
 ; CHECK:       b0:
 ; CHECK-NEXT:    call void @foo(i32 [[LEN]])
 ; CHECK-NEXT:    br label [[B1]]
@@ -399,9 +398,7 @@ define void @doesnt_imply_and(i32 %a, i32 %b, i1 %x) {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[END:%.*]], label [[TAKEN:%.*]]
 ; CHECK:       taken:
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR:%.*]] = and i1 [[CMP2]], [[X:%.*]]
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[OR]], i32 20, i32 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[X:%.*]], i32 20, i32 0
 ; CHECK-NEXT:    call void @foo(i32 [[C]])
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
@@ -546,9 +543,7 @@ define void @doesnt_imply_or(i32 %a, i32 %b, i1 %x) {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[END:%.*]], label [[TAKEN:%.*]]
 ; CHECK:       taken:
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[A]], [[B]]
-; CHECK-NEXT:    [[AND:%.*]] = or i1 [[CMP2]], [[X:%.*]]
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[AND]], i32 20, i32 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[X:%.*]], i32 20, i32 0
 ; CHECK-NEXT:    call void @foo(i32 [[C]])
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
index 379e9528c3fa..3f0e2b3a195c 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
@@ -214,8 +214,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
 ; CHECK-NEXT:    [[L_1:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    store i32 [[L_1]], ptr [[DST]], align 8
 ; CHECK-NEXT:    [[INNER_1_IV_NEXT:%.*]] = add i64 [[INNER_1_IV]], 1
-; CHECK-NEXT:    [[CMP_1_1:%.*]] = icmp sgt i32 [[OUTER_P]], 0
-; CHECK-NEXT:    br i1 [[CMP_1_1]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_1:%.*]]
+; CHECK-NEXT:    br i1 false, label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_1:%.*]]
 ; CHECK:       inner.1.latch.1:
 ; CHECK-NEXT:    [[L_1_1:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    store i32 [[L_1_1]], ptr [[DST]], align 8
@@ -226,8 +225,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
 ; CHECK-NEXT:    [[L_1_2:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    store i32 [[L_1_2]], ptr [[DST]], align 8
 ; CHECK-NEXT:    [[INNER_1_IV_NEXT_2:%.*]] = add i64 [[INNER_1_IV_NEXT_1]], 1
-; CHECK-NEXT:    [[CMP_1_3:%.*]] = icmp sgt i32 [[OUTER_P]], 0
-; CHECK-NEXT:    br i1 [[CMP_1_3]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_3:%.*]]
+; CHECK-NEXT:    br i1 false, label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_3:%.*]]
 ; CHECK:       inner.1.latch.3:
 ; CHECK-NEXT:    [[L_1_3:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    store i32 [[L_1_3]], ptr [[DST]], align 8
@@ -238,8 +236,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
 ; CHECK-NEXT:    [[L_1_4:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    store i32 [[L_1_4]], ptr [[DST]], align 8
 ; CHECK-NEXT:    [[INNER_1_IV_NEXT_4:%.*]] = add i64 [[INNER_1_IV_NEXT_3]], 1
-; CHECK-NEXT:    [[CMP_1_5:%.*]] = icmp sgt i32 [[OUTER_P]], 0
-; CHECK-NEXT:    br i1 [[CMP_1_5]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_5:%.*]]
+; CHECK-NEXT:    br i1 false, label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_5:%.*]]
 ; CHECK:       inner.1.latch.5:
 ; CHECK-NEXT:    [[L_1_5:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    store i32 [[L_1_5]], ptr [[DST]], align 8
@@ -250,8 +247,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
 ; CHECK-NEXT:    [[L_1_6:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    store i32 [[L_1_6]], ptr [[DST]], align 8
 ; CHECK-NEXT:    [[INNER_1_IV_NEXT_6:%.*]] = add i64 [[INNER_1_IV_NEXT_5]], 1
-; CHECK-NEXT:    [[CMP_1_7:%.*]] = icmp sgt i32 [[OUTER_P]], 0
-; CHECK-NEXT:    br i1 [[CMP_1_7]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_7]]
+; CHECK-NEXT:    br i1 false, label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_7]]
 ; CHECK:       inner.1.latch.7:
 ; CHECK-NEXT:    [[L_1_7:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    store i32 [[L_1_7]], ptr [[DST]], align 8

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
index 029dd4d97b7e..9ccee9567850 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
@@ -25,8 +25,7 @@ define i32 @test(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; ENABLED-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
-; ENABLED-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_1]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_1:%.*]]
+; ENABLED-NEXT:    br i1 false, label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_1:%.*]]
 ; ENABLED:       for.body.1:
 ; ENABLED-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
 ; ENABLED-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
@@ -41,8 +40,7 @@ define i32 @test(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
 ; ENABLED-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i64 [[NITER_NEXT_1]], 1
-; ENABLED-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_3]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_3:%.*]]
+; ENABLED-NEXT:    br i1 false, label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_3:%.*]]
 ; ENABLED:       for.body.3:
 ; ENABLED-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
 ; ENABLED-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
@@ -57,8 +55,7 @@ define i32 @test(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
 ; ENABLED-NEXT:    [[NITER_NEXT_4:%.*]] = add nuw nsw i64 [[NITER_NEXT_3]], 1
-; ENABLED-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_5]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_5:%.*]]
+; ENABLED-NEXT:    br i1 false, label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_5:%.*]]
 ; ENABLED:       for.body.5:
 ; ENABLED-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
 ; ENABLED-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
@@ -73,8 +70,7 @@ define i32 @test(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
 ; ENABLED-NEXT:    [[NITER_NEXT_6:%.*]] = add nuw nsw i64 [[NITER_NEXT_5]], 1
-; ENABLED-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_7]]
+; ENABLED-NEXT:    br i1 false, label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_7]]
 ; ENABLED:       for.body.7:
 ; ENABLED-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
 ; ENABLED-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4