[llvm] d2e5d35 - [StructurizeCFG] Clean up some boolean not instructions

Tue Feb 1 01:37:42 PST 2022

Author: Jay Foad
Date: 2022-02-01T09:35:37Z
New Revision: d2e5d3512be092af91a36d2e7d1884c786127950

URL: https://github.com/llvm/llvm-project/commit/d2e5d3512be092af91a36d2e7d1884c786127950
DIFF: https://github.com/llvm/llvm-project/commit/d2e5d3512be092af91a36d2e7d1884c786127950.diff

LOG: [StructurizeCFG] Clean up some boolean not instructions

In some cases StructurizeCFG inserts i1 xor instructions to invert
predicates. Add a quick loop to clean these up afterwards if we can get
away with modifying an existing compare instruction instead.
(StructurizeCFG is generally run late in the pipeline so instcombine
does not clean them up for us.)

Differential Revision: https://reviews.llvm.org/D118623

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
    llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
    llvm/test/CodeGen/AMDGPU/ctpop16.ll
    llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
    llvm/test/CodeGen/AMDGPU/loop_break.ll
    llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
    llvm/test/CodeGen/AMDGPU/multilevel-break.ll
    llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
    llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
    llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
    llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
    llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll
    llvm/test/Transforms/StructurizeCFG/bug36015.ll
    llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll
    llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
    llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index ac580b4161f4b..b3a445368537d 100644

--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -276,6 +276,8 @@ class StructurizeCFG {
 
   void insertConditions(bool Loops);
 
+  void simplifyConditions();
+
   void delPhiValues(BasicBlock *From, BasicBlock *To);
 
   void addPhiValues(BasicBlock *From, BasicBlock *To);
@@ -586,6 +588,28 @@ void StructurizeCFG::insertConditions(bool Loops) {
   }
 }
 
+/// Simplify any inverted conditions that were built by buildConditions.
+void StructurizeCFG::simplifyConditions() {
+  SmallVector<Instruction *> InstToErase;
+  for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
+    auto &Preds = I.second;
+    for (auto &J : Preds) {
+      auto &Cond = J.second;
+      Instruction *Inverted;
+      if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
+          !Cond->use_empty()) {
+        if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
+          InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
+          Cond->replaceAllUsesWith(InvertedCmp);
+          InstToErase.push_back(cast<Instruction>(Cond));
+        }
+      }
+    }
+  }
+  for (auto *I : InstToErase)
+    I->eraseFromParent();
+}
+
 /// Remove all PHI values coming from "From" into "To" and remember
 /// them in DeletedPhis
 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
@@ -1065,6 +1089,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
   createFlow();
   insertConditions(false);
   insertConditions(true);
+  simplifyConditions();
   setPhiValues();
   simplifyAffectedPhis();
   rebuildSSA();

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index 3e8d1ec485b5e..746f2591db7d8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -139,8 +139,10 @@ define void @constrained_if_register_class() {
 ; CHECK-NEXT:    s_load_dword s4, s[4:5], 0x0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
-; CHECK-NEXT:    s_cbranch_scc1 .LBB4_4
-; CHECK-NEXT:  ; %bb.1: ; %bb2
+; CHECK-NEXT:    s_cbranch_scc0 .LBB4_2
+; CHECK-NEXT:  .LBB4_1: ; %bb12
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:  .LBB4_2: ; %bb2
 ; CHECK-NEXT:    s_getpc_b64 s[4:5]
 ; CHECK-NEXT:    s_add_u32 s4, s4, const.ptr at gotpcrel32@lo+4
 ; CHECK-NEXT:    s_addc_u32 s5, s5, const.ptr at gotpcrel32@hi+12
@@ -153,15 +155,13 @@ define void @constrained_if_register_class() {
 ; CHECK-NEXT:    s_mov_b32 s4, -1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, 1.0, v0
-; CHECK-NEXT:    s_cbranch_vccnz .LBB4_3
-; CHECK-NEXT:  ; %bb.2: ; %bb7
+; CHECK-NEXT:    s_cbranch_vccnz .LBB4_4
+; CHECK-NEXT:  ; %bb.3: ; %bb7
 ; CHECK-NEXT:    s_mov_b32 s4, 0
-; CHECK-NEXT:  .LBB4_3: ; %bb8
+; CHECK-NEXT:  .LBB4_4: ; %bb8
 ; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
-; CHECK-NEXT:    s_cbranch_scc0 .LBB4_5
-; CHECK-NEXT:  .LBB4_4: ; %bb12
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-; CHECK-NEXT:  .LBB4_5: ; %bb11
+; CHECK-NEXT:    s_cbranch_scc1 .LBB4_1
+; CHECK-NEXT:  ; %bb.5: ; %bb11
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 4.0
 ; CHECK-NEXT:    buffer_store_dword v0, v0, s[0:3], 0 offen
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 73416dbb3096e..0b1105fba0eba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -838,7 +838,7 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr10
-; CGP-NEXT:  .LBB2_2: ; %Flow2
+; CGP-NEXT:  .LBB2_2: ; %Flow1
 ; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_4
@@ -3118,7 +3118,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    ; implicit-def: $vgpr8
-; CGP-NEXT:  .LBB8_2: ; %Flow2
+; CGP-NEXT:  .LBB8_2: ; %Flow1
 ; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[8:9]
 ; CGP-NEXT:    v_lshl_b64 v[9:10], s[6:7], v6
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 5e60c7ca2415a..1e95103fd61cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -824,7 +824,7 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v2, v4, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr10
-; CGP-NEXT:  .LBB2_2: ; %Flow2
+; CGP-NEXT:  .LBB2_2: ; %Flow1
 ; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_4
@@ -3072,7 +3072,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v2, v4, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    ; implicit-def: $vgpr8
-; CGP-NEXT:  .LBB8_2: ; %Flow2
+; CGP-NEXT:  .LBB8_2: ; %Flow1
 ; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[8:9]
 ; CGP-NEXT:    v_lshl_b64 v[9:10], s[6:7], v6
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index bf3c080cafaea..2ba189ce7b965 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -759,7 +759,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr10
-; CGP-NEXT:  .LBB2_2: ; %Flow2
+; CGP-NEXT:  .LBB2_2: ; %Flow1
 ; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_4
@@ -1641,7 +1641,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    ; implicit-def: $vgpr8
-; CGP-NEXT:  .LBB8_2: ; %Flow2
+; CGP-NEXT:  .LBB8_2: ; %Flow1
 ; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[8:9]
 ; CGP-NEXT:    v_lshl_b64 v[9:10], s[6:7], v6
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 97806c56204de..e6163221c6991 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -750,7 +750,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v2, v5, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr10
-; CGP-NEXT:  .LBB2_2: ; %Flow2
+; CGP-NEXT:  .LBB2_2: ; %Flow1
 ; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_4
@@ -2181,7 +2181,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    ; implicit-def: $vgpr8
-; CGP-NEXT:  .LBB8_2: ; %Flow2
+; CGP-NEXT:  .LBB8_2: ; %Flow1
 ; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[8:9]
 ; CGP-NEXT:    v_lshl_b64 v[9:10], s[6:7], v6
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]

diff  --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index c8c8911b733d1..c1cb51e133c2c 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -227,30 +227,31 @@ bb3:
 
 ; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch:
 ; GCN: s_cmp_eq_u32
-; GCN: s_cbranch_scc{{[0-1]}} [[BB2:.LBB[0-9]+_[0-9]+]]
+; GCN: s_cbranch_scc{{[0-1]}} [[BB1:.LBB[0-9]+_[0-9]+]]
 
 ; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
 ; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
-; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
-; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
+; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB4:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB4]]-[[POST_GETPC]])>>32
 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}
 
-; GCN: [[BB2]]: ; %bb3
-; GCN: v_nop_e64
-; GCN: v_nop_e64
-; GCN: v_nop_e64
-; GCN: v_nop_e64
-; GCN: ;;#ASMEND
-
-; GCN: [[BB3]]:
+; GCN: [[BB1]]:
 ; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
 ; GCN: buffer_store_dword [[BB2_K]]
 
 ; GCN: v_mov_b32_e32 [[BB4_K:v[0-9]+]], 63
 ; GCN: buffer_store_dword [[BB4_K]]
 ; GCN: s_endpgm
-; GCN-NEXT: .Lfunc_end{{[0-9]+}}:
+
+; GCN: [[BB4]]: ; %bb3
+; GCN: v_nop_e64
+; GCN: v_nop_e64
+; GCN: v_nop_e64
+; GCN: v_nop_e64
+; GCN: ;;#ASMEND
+
+; GCN: .Lfunc_end{{[0-9]+}}:
 define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
 bb0:
   %tmp = icmp ne i32 %arg1, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 308bc497f9d54..e4a1177f85203 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -1502,7 +1502,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_lshr_b32 s5, s4, 16
 ; SI-NEXT:    s_cmp_lg_u32 s5, 0
-; SI-NEXT:    s_cbranch_scc0 .LBB14_2
+; SI-NEXT:    s_cbranch_scc0 .LBB14_4
 ; SI-NEXT:  ; %bb.1: ; %else
 ; SI-NEXT:    s_mov_b32 s11, 0xf000
 ; SI-NEXT:    s_mov_b32 s10, -1
@@ -1510,22 +1510,22 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
 ; SI-NEXT:    s_mov_b32 s9, s3
 ; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 offset:2
 ; SI-NEXT:    s_mov_b64 s[2:3], 0
-; SI-NEXT:    s_cbranch_execz .LBB14_3
-; SI-NEXT:    s_branch .LBB14_4
-; SI-NEXT:  .LBB14_2:
-; SI-NEXT:    s_mov_b64 s[2:3], -1
-; SI-NEXT:    v_mov_b32_e32 v0, 0
-; SI-NEXT:  .LBB14_3: ; %if
+; SI-NEXT:    s_cbranch_execnz .LBB14_3
+; SI-NEXT:  .LBB14_2: ; %if
 ; SI-NEXT:    s_and_b32 s2, s4, 0xffff
 ; SI-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v0, s2
-; SI-NEXT:  .LBB14_4: ; %endif
+; SI-NEXT:  .LBB14_3: ; %endif
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
+; SI-NEXT:  .LBB14_4:
+; SI-NEXT:    s_mov_b64 s[2:3], -1
+; SI-NEXT:    v_mov_b32_e32 v0, 0
+; SI-NEXT:    s_branch .LBB14_2
 ;
 ; VI-LABEL: ctpop_i16_in_br:
 ; VI:       ; %bb.0: ; %entry
@@ -1535,7 +1535,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
 ; VI-NEXT:    s_lshr_b32 s5, s4, 16
 ; VI-NEXT:    v_cmp_ne_u16_e64 s[6:7], s5, 0
 ; VI-NEXT:    s_and_b64 vcc, exec, s[6:7]
-; VI-NEXT:    s_cbranch_vccz .LBB14_2
+; VI-NEXT:    s_cbranch_vccz .LBB14_4
 ; VI-NEXT:  ; %bb.1: ; %else
 ; VI-NEXT:    s_mov_b32 s11, 0xf000
 ; VI-NEXT:    s_mov_b32 s10, -1
@@ -1543,22 +1543,22 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
 ; VI-NEXT:    s_mov_b32 s9, s3
 ; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 offset:2
 ; VI-NEXT:    s_mov_b64 s[2:3], 0
-; VI-NEXT:    s_cbranch_execz .LBB14_3
-; VI-NEXT:    s_branch .LBB14_4
-; VI-NEXT:  .LBB14_2:
-; VI-NEXT:    s_mov_b64 s[2:3], -1
-; VI-NEXT:    ; implicit-def: $vgpr0
-; VI-NEXT:  .LBB14_3: ; %if
+; VI-NEXT:    s_cbranch_execnz .LBB14_3
+; VI-NEXT:  .LBB14_2: ; %if
 ; VI-NEXT:    s_and_b32 s2, s4, 0xffff
 ; VI-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:  .LBB14_4: ; %endif
+; VI-NEXT:  .LBB14_3: ; %endif
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
+; VI-NEXT:  .LBB14_4:
+; VI-NEXT:    s_mov_b64 s[2:3], -1
+; VI-NEXT:    ; implicit-def: $vgpr0
+; VI-NEXT:    s_branch .LBB14_2
 ;
 ; EG-LABEL: ctpop_i16_in_br:
 ; EG:       ; %bb.0: ; %entry

diff  --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 89d319d24bd0a..4de859f473dfc 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -1534,19 +1534,17 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_cmp_lg_u32 s6, 0
-; SI-NEXT:    s_cbranch_scc0 .LBB30_2
+; SI-NEXT:    s_cbranch_scc0 .LBB30_4
 ; SI-NEXT:  ; %bb.1: ; %else
 ; SI-NEXT:    s_load_dword s7, s[2:3], 0x1
 ; SI-NEXT:    s_mov_b64 s[4:5], 0
 ; SI-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 vcc, vcc
-; SI-NEXT:    s_cbranch_vccz .LBB30_3
-; SI-NEXT:    s_branch .LBB30_4
-; SI-NEXT:  .LBB30_2:
-; SI-NEXT:  .LBB30_3: ; %if
+; SI-NEXT:    s_cbranch_vccnz .LBB30_3
+; SI-NEXT:  .LBB30_2: ; %if
 ; SI-NEXT:    s_load_dword s7, s[2:3], 0x0
-; SI-NEXT:  .LBB30_4: ; %endif
+; SI-NEXT:  .LBB30_3: ; %endif
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v0, s6
 ; SI-NEXT:    s_mov_b32 s3, 0x100f000
@@ -1554,6 +1552,8 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
 ; SI-NEXT:    v_mov_b32_e32 v1, s7
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
+; SI-NEXT:  .LBB30_4:
+; SI-NEXT:    s_branch .LBB30_2
 ;
 ; VI-LABEL: insert_split_bb:
 ; VI:       ; %bb.0: ; %entry
@@ -1561,16 +1561,14 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_cmp_lg_u32 s6, 0
-; VI-NEXT:    s_cbranch_scc0 .LBB30_2
+; VI-NEXT:    s_cbranch_scc0 .LBB30_4
 ; VI-NEXT:  ; %bb.1: ; %else
 ; VI-NEXT:    s_load_dword s7, s[2:3], 0x4
-; VI-NEXT:    s_cbranch_execz .LBB30_3
-; VI-NEXT:    s_branch .LBB30_4
-; VI-NEXT:  .LBB30_2:
-; VI-NEXT:  .LBB30_3: ; %if
+; VI-NEXT:    s_cbranch_execnz .LBB30_3
+; VI-NEXT:  .LBB30_2: ; %if
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_load_dword s7, s[2:3], 0x0
-; VI-NEXT:  .LBB30_4: ; %endif
+; VI-NEXT:  .LBB30_3: ; %endif
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v0, s6
 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000
@@ -1578,6 +1576,8 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
+; VI-NEXT:  .LBB30_4:
+; VI-NEXT:    s_branch .LBB30_2
 entry:
   %0 = insertelement <2 x i32> undef, i32 %a, i32 0
   %1 = icmp eq i32 %a, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll
index ea6493c6f79e5..8cccb5e65806e 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_break.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll
@@ -17,11 +17,10 @@ define amdgpu_kernel void @break_loop(i32 %arg) #0 {
 ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
 ; OPT:       bb4:
 ; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
-; OPT-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]]
-; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[CMP1]], true
+; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
 ; OPT-NEXT:    br label [[FLOW]]
 ; OPT:       Flow:
-; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ]
+; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
 ; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
 ; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
 ; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]

diff  --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index ff227129745a7..64505bcf7badc 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -9,14 +9,14 @@
 ; StructurizeCFG.
 
 ; IR-LABEL: @multi_divergent_region_exit_ret_ret(
-; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv)
+; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot)
 ; IR: %1 = extractvalue { i1, i64 } %0, 0
 ; IR: %2 = extractvalue { i1, i64 } %0, 1
 ; IR: br i1 %1, label %LeafBlock1, label %Flow
 
 ; IR: Flow:
 ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %4 = phi i1 [ %SwitchLeaf2.inv, %LeafBlock1 ], [ false, %entry ]
+; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
 ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2)
 ; IR: %6 = extractvalue { i1, i64 } %5, 0
 ; IR: %7 = extractvalue { i1, i64 } %5, 1
@@ -75,14 +75,13 @@
 ; GCN-NEXT: s_or_saveexec_b64
 ; GCN-NEXT: s_xor_b64
 
-; FIXME: Why is this compare essentially repeated?
 ; GCN: ; %LeafBlock
 ; GCN-DAG:  v_cmp_eq_u32_e32    vcc, 1,
-; GCN-DAG:  v_cmp_ne_u32_e64    [[TMP1:s\[[0-9]+:[0-9]+\]]], 1,
+; GCN-DAG:  v_cmp_ne_u32_e64    [[INV:s\[[0-9]+:[0-9]+\]]], 1,
 ; GCN-DAG:  s_andn2_b64         [[EXIT0]], [[EXIT0]], exec
 ; GCN-DAG:  s_andn2_b64         [[EXIT1]], [[EXIT1]], exec
 ; GCN-DAG:  s_and_b64           [[TMP0:s\[[0-9]+:[0-9]+\]]], vcc, exec
-; GCN-DAG:  s_and_b64           [[TMP1]], [[TMP1]], exec
+; GCN-DAG:  s_and_b64           [[TMP1:s\[[0-9]+:[0-9]+\]]], [[INV]], exec
 ; GCN-DAG:  s_or_b64            [[EXIT0]], [[EXIT0]], [[TMP0]]
 ; GCN-DAG:  s_or_b64            [[EXIT1]], [[EXIT1]], [[TMP1]]
 
@@ -141,7 +140,7 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 }
 
 ; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable(
-; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv)
+; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot)
 
 ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2)
 
@@ -196,24 +195,22 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 }
 
 ; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret(
-; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2
+; IR: %divergent.cond0 = icmp sge i32 %tmp16, 2
 ; IR: llvm.amdgcn.if
 ; IR: br i1
 
 ; IR: {{^}}Flow:
 ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %4 = phi i1 [ %uniform.cond0.inv, %LeafBlock1 ], [ false, %entry ]
+; IR: %4 = phi i1 [ %uniform.cond0, %LeafBlock1 ], [ false, %entry ]
 ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2)
 ; IR: br i1 %6, label %LeafBlock, label %Flow1
 
 ; IR: {{^}}LeafBlock:
 ; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1
-; IR: %divergent.cond1.inv = xor i1 %divergent.cond1, true
 ; IR: br label %Flow1
 
 ; IR: LeafBlock1:
-; IR: %uniform.cond0 = icmp eq i32 %arg3, 2
-; IR: %uniform.cond0.inv = xor i1 %uniform.cond0, true
+; IR: %uniform.cond0 = icmp ne i32 %arg3, 2
 ; IR: br label %Flow
 
 ; IR: Flow2:
@@ -279,12 +276,12 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 }
 
 ; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret(
-; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv)
+; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot)
 ; IR: br i1 %1, label %LeafBlock1, label %Flow
 
 ; IR: Flow:
 ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %4 = phi i1 [ %SwitchLeaf2.inv, %LeafBlock1 ], [ false, %entry ]
+; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
 ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2)
 
 ; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
@@ -401,11 +398,11 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 }
 
 ; IR-LABEL: @multi_divergent_region_exit_ret_unreachable(
-; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv)
+; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot)
 
 ; IR: Flow:
 ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %4 = phi i1 [ %SwitchLeaf2.inv, %LeafBlock1 ], [ false, %entry ]
+; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
 ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2)
 
 ; IR: Flow2:
@@ -640,7 +637,7 @@ uniform.ret:
 ; IR: br i1 %6, label %uniform.if, label %Flow2
 
 ; IR: Flow:                                             ; preds = %uniform.then, %uniform.if
-; IR: %7 = phi i1 [ %uniform.cond2.inv, %uniform.then ], [ %uniform.cond1.inv, %uniform.if ]
+; IR: %7 = phi i1 [ %uniform.cond2, %uniform.then ], [ %uniform.cond1.inv, %uniform.if ]
 ; IR: br i1 %7, label %uniform.endif, label %uniform.ret0
 
 ; IR: UnifiedReturnBlock:                               ; preds = %Flow3, %Flow2

diff  --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index ee90406595247..5ec757a2240f9 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -123,14 +123,13 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
 ; OPT-NEXT:    [[LOAD0:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
 ; OPT-NEXT:    br label [[NODEBLOCK:%.*]]
 ; OPT:       NodeBlock:
-; OPT-NEXT:    [[PIVOT:%.*]] = icmp slt i32 [[LOAD0]], 1
-; OPT-NEXT:    [[PIVOT_INV:%.*]] = xor i1 [[PIVOT]], true
-; OPT-NEXT:    br i1 [[PIVOT_INV]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]]
+; OPT-NEXT:    [[PIVOT:%.*]] = icmp sge i32 [[LOAD0]], 1
+; OPT-NEXT:    br i1 [[PIVOT]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]]
 ; OPT:       LeafBlock1:
 ; OPT-NEXT:    [[SWITCHLEAF2:%.*]] = icmp eq i32 [[LOAD0]], 1
 ; OPT-NEXT:    br i1 [[SWITCHLEAF2]], label [[CASE1:%.*]], label [[FLOW3:%.*]]
 ; OPT:       Flow3:
-; OPT-NEXT:    [[TMP0:%.*]] = phi i1 [ [[CMP2_INV:%.*]], [[CASE1]] ], [ true, [[LEAFBLOCK1]] ]
+; OPT-NEXT:    [[TMP0:%.*]] = phi i1 [ [[CMP2:%.*]], [[CASE1]] ], [ true, [[LEAFBLOCK1]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ false, [[CASE1]] ], [ true, [[LEAFBLOCK1]] ]
 ; OPT-NEXT:    br label [[FLOW]]
 ; OPT:       LeafBlock:
@@ -144,8 +143,7 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
 ; OPT-NEXT:    br i1 [[TMP5]], label [[FLOW6:%.*]], label [[BB1]]
 ; OPT:       case0:
 ; OPT-NEXT:    [[LOAD1:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
-; OPT-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP]], [[LOAD1]]
-; OPT-NEXT:    [[CMP1_INV:%.*]] = xor i1 [[CMP1]], true
+; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[TMP]], [[LOAD1]]
 ; OPT-NEXT:    br label [[FLOW5]]
 ; OPT:       Flow:
 ; OPT-NEXT:    [[TMP6]] = phi i1 [ [[TMP0]], [[FLOW3]] ], [ true, [[NODEBLOCK]] ]
@@ -154,11 +152,10 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
 ; OPT-NEXT:    br i1 [[TMP8]], label [[LEAFBLOCK:%.*]], label [[FLOW4]]
 ; OPT:       case1:
 ; OPT-NEXT:    [[LOAD2:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
-; OPT-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP]], [[LOAD2]]
-; OPT-NEXT:    [[CMP2_INV]] = xor i1 [[CMP2]], true
+; OPT-NEXT:    [[CMP2]] = icmp sge i32 [[TMP]], [[LOAD2]]
 ; OPT-NEXT:    br label [[FLOW3]]
 ; OPT:       Flow5:
-; OPT-NEXT:    [[TMP9]] = phi i1 [ [[CMP1_INV]], [[CASE0]] ], [ [[TMP6]], [[LEAFBLOCK]] ]
+; OPT-NEXT:    [[TMP9]] = phi i1 [ [[CMP1]], [[CASE0]] ], [ [[TMP6]], [[LEAFBLOCK]] ]
 ; OPT-NEXT:    [[TMP10]] = phi i1 [ false, [[CASE0]] ], [ true, [[LEAFBLOCK]] ]
 ; OPT-NEXT:    br label [[FLOW4]]
 ; OPT:       Flow6:
@@ -196,8 +193,8 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v1
 ; GCN-NEXT:    s_mov_b64 s[6:7], -1
+; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v1
 ; GCN-NEXT:    s_and_b64 vcc, exec, vcc
 ; GCN-NEXT:    ; implicit-def: $sgpr8_sgpr9
 ; GCN-NEXT:    s_mov_b64 s[10:11], -1

diff  --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
index a1fa2ab9c690e..113e346ab616f 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -236,8 +236,8 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
 ; IR:       Flow1:
 ; IR-NEXT:    [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], [[BB21:%.*]] ], [ undef, [[BB14]] ]
 ; IR-NEXT:    [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], [[BB21]] ], [ undef, [[BB14]] ]
-; IR-NEXT:    [[TMP13:%.*]] = phi i1 [ [[TMP18:%.*]], [[BB21]] ], [ true, [[BB14]] ]
-; IR-NEXT:    [[TMP14]] = phi i1 [ [[TMP18]], [[BB21]] ], [ false, [[BB14]] ]
+; IR-NEXT:    [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], [[BB21]] ], [ true, [[BB14]] ]
+; IR-NEXT:    [[TMP14]] = phi i1 [ [[MY_TMP12]], [[BB21]] ], [ false, [[BB14]] ]
 ; IR-NEXT:    [[TMP15:%.*]] = phi i1 [ false, [[BB21]] ], [ true, [[BB14]] ]
 ; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]])
 ; IR-NEXT:    [[TMP16]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]])
@@ -262,8 +262,7 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
 ; IR-NEXT:    [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16
 ; IR-NEXT:    [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0
 ; IR-NEXT:    [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef
-; IR-NEXT:    [[MY_TMP12:%.*]] = icmp slt i32 [[MY_TMP11]], 9
-; IR-NEXT:    [[TMP18]] = xor i1 [[MY_TMP12]], true
+; IR-NEXT:    [[MY_TMP12]] = icmp sge i32 [[MY_TMP11]], 9
 ; IR-NEXT:    br label [[FLOW1]]
 ; IR:       Flow2:
 ; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]])

diff  --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
index f12bed6cc554a..b4eb682d5c082 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
+++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
@@ -36,19 +36,17 @@ bb4:
 
 ; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
 ; GCN:   s_cmp_lg_u32
-; GCN: s_cselect_b64  [[CC1:[^,]+]], -1, 0
+; GCN:   s_cselect_b64  [[CC1:[^,]+]], -1, 0
 ; GCN:   s_branch [[BB1:.LBB[0-9]+_[0-9]+]]
 ; GCN: [[BB0:.LBB[0-9]+_[0-9]+]]
 ; GCN-NOT: v_cndmask_b32
 ; GCN-NOT: v_cmp
 ; GCN: [[BB1]]:
-; GCN:   s_mov_b64 [[CC2:[^,]+]], -1
 ; GCN:   s_mov_b64 vcc, [[CC1]]
 ; GCN:   s_cbranch_vccz [[BB2:.LBB[0-9]+_[0-9]+]]
-; GCN:   s_mov_b64 [[CC2]], 0
+; GCN:   s_mov_b64 vcc, exec
+; GCN:   s_cbranch_execnz [[BB0]]
 ; GCN: [[BB2]]:
-; GCN:   s_andn2_b64 vcc, exec, [[CC2]]
-; GCN:   s_cbranch_vccnz [[BB0]]
 define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) {
 bb:
   br label %bb2

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index 6a2b7cac64fb2..15384e670cbb5 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -16,22 +16,22 @@ define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a,
 ; SI-NEXT:    s_load_dword s0, s[0:1], 0xf
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_cmp_lg_u32 s8, 0
-; SI-NEXT:    s_cbranch_scc0 .LBB0_2
+; SI-NEXT:    s_cbranch_scc0 .LBB0_4
 ; SI-NEXT:  ; %bb.1: ; %else
 ; SI-NEXT:    s_add_i32 s2, s11, s0
-; SI-NEXT:    s_cbranch_execz .LBB0_3
-; SI-NEXT:    s_branch .LBB0_4
-; SI-NEXT:  .LBB0_2:
-; SI-NEXT:    ; implicit-def: $sgpr2
-; SI-NEXT:  .LBB0_3: ; %if
+; SI-NEXT:    s_cbranch_execnz .LBB0_3
+; SI-NEXT:  .LBB0_2: ; %if
 ; SI-NEXT:    s_sub_i32 s2, s9, s10
-; SI-NEXT:  .LBB0_4: ; %endif
+; SI-NEXT:  .LBB0_3: ; %endif
 ; SI-NEXT:    s_add_i32 s0, s2, s8
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
+; SI-NEXT:  .LBB0_4:
+; SI-NEXT:    ; implicit-def: $sgpr2
+; SI-NEXT:    s_branch .LBB0_2
 
 entry:
   %0 = icmp eq i32 %a, 0
@@ -59,28 +59,28 @@ define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, [8 x
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_cmp_lg_u32 s6, 0
-; SI-NEXT:    s_cbranch_scc0 .LBB1_2
+; SI-NEXT:    s_cbranch_scc0 .LBB1_4
 ; SI-NEXT:  ; %bb.1: ; %else
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0x2e
 ; SI-NEXT:    s_load_dword s3, s[0:1], 0x37
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_add_i32 s7, s2, s3
-; SI-NEXT:    s_cbranch_execz .LBB1_3
-; SI-NEXT:    s_branch .LBB1_4
-; SI-NEXT:  .LBB1_2:
-; SI-NEXT:    ; implicit-def: $sgpr7
-; SI-NEXT:  .LBB1_3: ; %if
+; SI-NEXT:    s_cbranch_execnz .LBB1_3
+; SI-NEXT:  .LBB1_2: ; %if
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0x1c
 ; SI-NEXT:    s_load_dword s0, s[0:1], 0x25
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_add_i32 s7, s2, s0
-; SI-NEXT:  .LBB1_4: ; %endif
+; SI-NEXT:  .LBB1_3: ; %endif
 ; SI-NEXT:    s_add_i32 s0, s7, s6
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
+; SI-NEXT:  .LBB1_4:
+; SI-NEXT:    ; implicit-def: $sgpr7
+; SI-NEXT:    s_branch .LBB1_2
 
 entry:
   %cmp0 = icmp eq i32 %a, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
index 874326d400ec9..ef2acafa66d9a 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -213,7 +213,7 @@ ENDIF:                                            ; preds = %LOOP
 ; CHECK-LABEL: {{^}}sample_v3:
 ; CHECK: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 5
 ; CHECK: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 7
-; CHECK: s_branch
+; CHECK: s_cbranch
 
 ; CHECK: BB{{[0-9]+_[0-9]+}}:
 ; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11
@@ -315,13 +315,15 @@ ENDIF69:                                          ; preds = %LOOP68
 ; CHECK-LABEL:{{^}}sample_rsrc
 
 ; CHECK: s_cmp_eq_u32
-; CHECK: s_cbranch_scc0 [[END:.LBB[0-9]+_[0-9]+]]
+; CHECK: s_cbranch_scc1 [[END:.LBB[0-9]+_[0-9]+]]
 
-; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
+; CHECK: s_endpgm
 
 ; [[END]]:
+; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
 ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
-; CHECK: s_endpgm
+; CHECK: s_branch
 define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <4 x i32>] addrspace(4)* inreg %arg2, [32 x <8 x i32>] addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
 bb:
   %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i32 0, i32 0

diff  --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll
index 42462b7201a2a..1a2ae7088091d 100644
--- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll
+++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll
@@ -22,26 +22,25 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0
 ; CHECK-LABEL: @loop_subregion_misordered(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP:%.*]] = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
-; CHECK-NEXT:    [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef
+; CHECK-NEXT:    [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef, align 8
 ; CHECK-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG0:%.*]], i32 [[TID]]
 ; CHECK-NEXT:    [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       LOOP.HEADER:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[FLOW3:%.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[FLOW3:%.*]] ]
 ; CHECK-NEXT:    call void asm sideeffect "s_nop 0x100b
 ; CHECK-NEXT:    [[TMP12:%.*]] = zext i32 [[I]] to i64
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP13]], align 16
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0
 ; CHECK-NEXT:    [[TMP16:%.*]] = and i32 [[TMP15]], 65535
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1
-; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TMP17]], true
-; CHECK-NEXT:    br i1 [[TMP0]], label [[BB62:%.*]], label [[FLOW:%.*]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 1
+; CHECK-NEXT:    br i1 [[TMP17]], label [[BB62:%.*]], label [[FLOW:%.*]]
 ; CHECK:       Flow1:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[BB62]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[BB62]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ]
 ; CHECK-NEXT:    br label [[FLOW]]
 ; CHECK:       bb18:
 ; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0
@@ -50,9 +49,9 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0
 ; CHECK-NEXT:    [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52
 ; CHECK-NEXT:    br label [[INNER_LOOP:%.*]]
 ; CHECK:       Flow2:
-; CHECK-NEXT:    [[TMP4]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP8:%.*]], [[FLOW]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP10:%.*]], [[FLOW]] ]
-; CHECK-NEXT:    br i1 [[TMP5]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]]
+; CHECK-NEXT:    [[TMP3]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP6:%.*]], [[FLOW]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]]
 ; CHECK:       INNER_LOOP:
 ; CHECK-NEXT:    [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ]
 ; CHECK-NEXT:    call void asm sideeffect "
@@ -61,33 +60,32 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0
 ; CHECK-NEXT:    br i1 [[INNER_LOOP_CMP]], label [[INNER_LOOP_BREAK]], label [[INNER_LOOP]]
 ; CHECK:       INNER_LOOP_BREAK:
 ; CHECK-NEXT:    [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2
-; CHECK-NEXT:    call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
+; CHECK-NEXT:    call void asm sideeffect "s_nop 23 ", "~{memory}"() #[[ATTR0:[0-9]+]]
 ; CHECK-NEXT:    br label [[FLOW2:%.*]]
 ; CHECK:       bb62:
-; CHECK-NEXT:    [[LOAD13:%.*]] = icmp ult i32 [[TMP16]], 271
-; CHECK-NEXT:    [[TMP6:%.*]] = xor i1 [[LOAD13]], true
-; CHECK-NEXT:    br i1 [[TMP6]], label [[INCREMENT_I]], label [[FLOW1:%.*]]
+; CHECK-NEXT:    [[LOAD13:%.*]] = icmp uge i32 [[TMP16]], 271
+; CHECK-NEXT:    br i1 [[LOAD13]], label [[INCREMENT_I]], label [[FLOW1:%.*]]
 ; CHECK:       Flow3:
-; CHECK-NEXT:    [[TMP7:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ]
-; CHECK-NEXT:    br i1 [[TMP7]], label [[FLOW4:%.*]], label [[LOOP_HEADER]]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[FLOW4:%.*]], label [[LOOP_HEADER]]
 ; CHECK:       Flow4:
-; CHECK-NEXT:    br i1 [[TMP9:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    br i1 [[TMP7:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]]
 ; CHECK:       bb64:
-; CHECK-NEXT:    call void asm sideeffect "s_nop 42", "~{memory}"() #0
+; CHECK-NEXT:    call void asm sideeffect "s_nop 42", "~{memory}"() #[[ATTR0]]
 ; CHECK-NEXT:    br label [[RETURN]]
 ; CHECK:       Flow:
-; CHECK-NEXT:    [[TMP8]] = phi i32 [ [[TMP1]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ]
-; CHECK-NEXT:    [[TMP9]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
-; CHECK-NEXT:    [[TMP10]] = phi i1 [ [[TMP3]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
-; CHECK-NEXT:    [[TMP11:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ]
-; CHECK-NEXT:    br i1 [[TMP11]], label [[BB18]], label [[FLOW2]]
+; CHECK-NEXT:    [[TMP6]] = phi i32 [ [[TMP0]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP7]] = phi i1 [ [[TMP1]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP8]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[BB18]], label [[FLOW2]]
 ; CHECK:       INCREMENT_I:
 ; CHECK-NEXT:    [[INC_I]] = add i32 [[I]], 1
 ; CHECK-NEXT:    call void asm sideeffect "s_nop 0x1336
 ; CHECK-NEXT:    br label [[FLOW1]]
 ; CHECK:       END_ELSE_BLOCK:
 ; CHECK-NEXT:    call void asm sideeffect "s_nop 0x1337
-; CHECK-NEXT:    [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP4]], -1
+; CHECK-NEXT:    [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP3]], -1
 ; CHECK-NEXT:    br label [[FLOW3]]
 ; CHECK:       RETURN:
 ; CHECK-NEXT:    call void asm sideeffect "s_nop 0x99

diff  --git a/llvm/test/Transforms/StructurizeCFG/bug36015.ll b/llvm/test/Transforms/StructurizeCFG/bug36015.ll
index 507b9ae58504c..ea0cf0503b82c 100644
--- a/llvm/test/Transforms/StructurizeCFG/bug36015.ll
+++ b/llvm/test/Transforms/StructurizeCFG/bug36015.ll
@@ -18,7 +18,7 @@ loop.inner:
   br i1 %cond.inner, label %if, label %else
 
 ; CHECK: if:
-; CHECK:   %cond.if.inv = xor i1 %cond.if, true
+; CHECK:   %cond.if = icmp sge i32 %ctr.if, %count
 ; CHECK:   br label %Flow
 if:
   %ctr.if = add i32 %ctr.loop.inner, 1
@@ -27,7 +27,7 @@ if:
   br i1 %cond.if, label %loop.inner, label %exit
 
 ; CHECK: Flow:
-; CHECK:   %1 = phi i1 [ %cond.if.inv, %if ], [ true, %loop.inner ]
+; CHECK:   %1 = phi i1 [ %cond.if, %if ], [ true, %loop.inner ]
 ; CHECK:   %2 = phi i1 [ false, %if ], [ true, %loop.inner ]
 ; CHECK:   br i1 %1, label %Flow1, label %loop.inner
 
@@ -35,6 +35,7 @@ if:
 ; CHECK:   br i1 %2, label %else, label %Flow2
 
 ; CHECK: else:
+; CHECK:   %cond.else = icmp sge i32 %ctr.else, %count
 ; CHECK:   br label %Flow2
 else:
   %ctr.else = add i32 %ctr.loop.inner, 1
@@ -43,7 +44,7 @@ else:
   br i1 %cond.else, label %loop.outer, label %exit
 
 ; CHECK: Flow2:
-; CHECK:   %4 = phi i1 [ %cond.else.inv, %else ], [ true, %Flow1 ]
+; CHECK:   %4 = phi i1 [ %cond.else, %else ], [ true, %Flow1 ]
 ; CHECK:   br i1 %4, label %exit, label %loop.outer
 
 exit:

diff  --git a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll
index 61482bb73ad05..880cbfeb17415 100644
--- a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll
+++ b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll
@@ -6,8 +6,7 @@
 define void @invert_constantexpr_condition(i32 %arg, i32 %arg1) #0 {
 ; CHECK-LABEL: @invert_constantexpr_condition(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = icmp eq i32 [[ARG:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TMP]], true
+; CHECK-NEXT:    [[TMP:%.*]] = icmp ne i32 [[ARG:%.*]], 0
 ; CHECK-NEXT:    br i1 icmp eq (i32 ptrtoint (i32* @g to i32), i32 0), label [[BB2:%.*]], label [[FLOW:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[FLOW]]
@@ -16,8 +15,8 @@ define void @invert_constantexpr_condition(i32 %arg, i32 %arg1) #0 {
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], icmp eq (i32 ptrtoint (i32* @g to i32), i32 0)
 ; CHECK-NEXT:    br label [[BB8:%.*]]
 ; CHECK:       Flow:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ]
-; CHECK-NEXT:    br i1 [[TMP1]], label [[BB6]], label [[BB3:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ [[TMP]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[BB6]], label [[BB3:%.*]]
 ; CHECK:       bb6:
 ; CHECK-NEXT:    [[TMP7]] = icmp slt i32 [[ARG]], [[ARG1:%.*]]
 ; CHECK-NEXT:    br label [[BB3]]

diff  --git a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
index d21742fb4e8aa..6decec8e70ce4 100644
--- a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
+++ b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
@@ -16,14 +16,14 @@ bb3:                                              ; preds = %bb7, %bb
   br i1 %tmp4, label %bb7, label %bb5
 
 ; CHECK: bb5:
-; CHECK:   %tmp6.inv = xor i1 %tmp6, true
+; CHECK:   %tmp6 = fcmp uge float 0.000000e+00, %arg2
 ; CHECK:   br label %Flow
 bb5:                                              ; preds = %bb3
   %tmp6 = fcmp olt float 0.000000e+00, %arg2
   br i1 %tmp6, label %bb10, label %bb7
 
 ; CHECK: Flow:
-; CHECK:   %0 = phi i1 [ %tmp6.inv, %bb5 ], [ %tmp4, %bb3 ]
+; CHECK:   %0 = phi i1 [ %tmp6, %bb5 ], [ %tmp4, %bb3 ]
 ; CHECK:   br i1 %0, label %bb7, label %Flow1
 
 ; CHECK: bb7:
@@ -34,7 +34,7 @@ bb7:                                              ; preds = %bb5, %bb3
   br i1 %tmp9, label %bb3, label %bb10
 
 ; CHECK: Flow1:
-; CHECK:   %3 = phi i1 [ %tmp9.inv, %bb7 ], [ true, %Flow ]
+; CHECK:   %3 = phi i1 [ %tmp9, %bb7 ], [ true, %Flow ]
 ; CHECK:   br i1 %3, label %bb10, label %bb3
 
 ; CHECK: bb10:

diff  --git a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
index 291e9a58e09bd..12b18f0ecd06c 100644
--- a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
+++ b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
@@ -59,7 +59,8 @@ for.end:                                          ; preds = %for.body.1, %if.the
 ; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2
 
 ; CHECK: for.body.1:
-; CHECK: br i1 %cmp1.5.inv, label %for.body.6, label %Flow3
+; CHECK: %cmp1.5 = icmp ne i32 %tmp22, %K1
+; CHECK: br i1 %cmp1.5, label %for.body.6, label %Flow3
 for.body.1:                                       ; preds = %if.then, %lor.lhs.false
   %best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ]
   %best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]