[llvm] [AMDGPU][GlobalISel] Add register bank legalize rules for amdgcn_icmp, amdgcn_fcmp and amdgcn_ballot. (PR #172017)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 6 03:54:22 PST 2026


https://github.com/anjenner updated https://github.com/llvm/llvm-project/pull/172017

>From 4050d7d4e052fbf59d211f960a1379c371e26e47 Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Fri, 12 Dec 2025 08:53:17 -0500
Subject: [PATCH 1/4] AMDGPU: Add register bank legalize rules for amdgcn_icmp,
 amdgcn_fcmp and amdgcn_ballot.

---
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |   30 +
 .../CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll  | 1183 ++++++++++++++---
 2 files changed, 1053 insertions(+), 160 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index d01afee331025..05ba285bd1379 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1018,4 +1018,34 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       // readfirstlaning just in case register is not in sgpr.
       .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
 
+  addRulesForIOpcs({amdgcn_icmp})
+    .Any({{UniS64, _, S1},
+          {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+    .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+    .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+    .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+    .Any({{UniS32, _, S1},
+          {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+    .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+    .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+    .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+
+  addRulesForIOpcs({amdgcn_fcmp})
+    .Any({{UniS64, _, S1},
+          {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+    .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+    .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+    .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+    .Any({{UniS32, _, S1},
+          {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+    .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+    .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+    .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+
+  addRulesForIOpcs({amdgcn_ballot}, Standard)
+    .Uni(S64, {{Sgpr64}, {None, Vcc}})
+    .Uni(S32, {{Sgpr32}, {None, Vcc}});
+
 } // end initialize rules
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index c1f3a12dba578..3b93164539df7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -1,6 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --no-generate-body-for-unused-prefixes
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select -global-isel-abort=0 < %s | FileCheck -check-prefixes=CHECK,GFX10-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11-GISEL %s
 
 declare i32 @llvm.amdgcn.ballot.i32(i1)
 declare i32 @llvm.ctpop.i32(i32)
@@ -8,6 +10,10 @@ declare i32 @llvm.ctpop.i32(i32)
 ; Test ballot(0)
 
 define amdgpu_cs i32 @constant_false() {
+; CHECK-BOTH-LABEL: constant_false:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 0
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: constant_false:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_mov_b32 s0, 0
@@ -19,10 +25,30 @@ define amdgpu_cs i32 @constant_false() {
 ; Test ballot(1)
 
 define amdgpu_cs i32 @constant_true() {
-; CHECK-LABEL: constant_true:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_mov_b32 s0, exec_lo
-; CHECK-NEXT:    ; return to shader part epilog
+; CHECK-SDAG-LABEL: constant_true:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, exec_lo
+; CHECK-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: constant_true:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_mov_b32 s0, exec_lo
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: constant_true:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s0, exec_lo
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: constant_true:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_and_b32 s0, exec_lo, exec_lo
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: constant_true:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, exec_lo
+; GFX11-GISEL-NEXT:    ; return to shader part epilog
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)
   ret i32 %ballot
 }
@@ -30,6 +56,11 @@ define amdgpu_cs i32 @constant_true() {
 ; Test ballot of a non-comparison operation
 
 define amdgpu_cs i32 @non_compare(i32 %x) {
+; CHECK-BOTH-LABEL: non_compare:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-BOTH-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: non_compare:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -43,6 +74,10 @@ define amdgpu_cs i32 @non_compare(i32 %x) {
 ; Test ballot of comparisons
 
 define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
+; CHECK-BOTH-LABEL: compare_ints:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: compare_ints:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
@@ -53,16 +88,40 @@ define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
 }
 
 define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
-; CHECK-LABEL: compare_int_with_constant:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
-; CHECK-NEXT:    ; return to shader part epilog
+; CHECK-SDAG-LABEL: compare_int_with_constant:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; CHECK-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: compare_int_with_constant:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: compare_int_with_constant:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: compare_int_with_constant:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_cmp_le_i32_e64 s0, 0x63, v0
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: compare_int_with_constant:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX11-GISEL-NEXT:    ; return to shader part epilog
   %cmp = icmp sge i32 %x, 99
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
   ret i32 %ballot
 }
 
 define amdgpu_cs i32 @compare_floats(float %x, float %y) {
+; CHECK-BOTH-LABEL: compare_floats:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: compare_floats:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
@@ -73,6 +132,11 @@ define amdgpu_cs i32 @compare_floats(float %x, float %y) {
 }
 
 define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
+; CHECK-BOTH-LABEL: ctpop_of_ballot:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v0, v1
+; CHECK-BOTH-NEXT:    s_bcnt1_i32_b32 s0, vcc_lo
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: ctpop_of_ballot:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v0, v1
@@ -85,18 +149,71 @@ define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-NEXT:    s_cbranch_vccz .LBB7_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB7_3
-; CHECK-NEXT:  .LBB7_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB7_3
-; CHECK-NEXT:  .LBB7_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB7_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB7_3
+; CHECK-SDAG-NEXT:  .LBB7_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB7_3
+; CHECK-SDAG-NEXT:  .LBB7_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-NEXT:    s_cbranch_vccz .LBB7_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB7_3
+; GFX10-NEXT:  .LBB7_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB7_3
+; GFX10-NEXT:  .LBB7_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    s_cbranch_vccz .LBB7_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB7_3
+; GFX11-NEXT:  .LBB7_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB7_3
+; GFX11-NEXT:  .LBB7_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    s_cmp_eq_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB7_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB7_3
+; GFX10-GISEL-NEXT:  .LBB7_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB7_3
+; GFX10-GISEL-NEXT:  .LBB7_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB7_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB7_3
+; GFX11-GISEL-NEXT:  .LBB7_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB7_3
+; GFX11-GISEL-NEXT:  .LBB7_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -108,17 +225,67 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_bitcmp0_b32 s0, 0
-; CHECK-NEXT:    s_cbranch_scc1 .LBB8_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB8_3
-; CHECK-NEXT:  .LBB8_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB8_3
-; CHECK-NEXT:  .LBB8_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_bitcmp0_b32 s0, 0
+; CHECK-SDAG-NEXT:    s_cbranch_scc1 .LBB8_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB8_3
+; CHECK-SDAG-NEXT:  .LBB8_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB8_3
+; CHECK-SDAG-NEXT:  .LBB8_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_bitcmp0_b32 s0, 0
+; GFX10-NEXT:    s_cbranch_scc1 .LBB8_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB8_3
+; GFX10-NEXT:  .LBB8_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB8_3
+; GFX10-NEXT:  .LBB8_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_bitcmp0_b32 s0, 0
+; GFX11-NEXT:    s_cbranch_scc1 .LBB8_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB8_3
+; GFX11-NEXT:  .LBB8_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB8_3
+; GFX11-NEXT:  .LBB8_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB8_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB8_3
+; GFX10-GISEL-NEXT:  .LBB8_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB8_3
+; GFX10-GISEL-NEXT:  .LBB8_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_bitcmp0_b32 s0, 0
+; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB8_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB8_3
+; GFX11-GISEL-NEXT:  .LBB8_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB8_3
+; GFX11-GISEL-NEXT:  .LBB8_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -130,18 +297,71 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-NEXT:    s_cbranch_vccz .LBB9_2
-; CHECK-NEXT:  ; %bb.1: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB9_3
-; CHECK-NEXT:  .LBB9_2: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB9_3
-; CHECK-NEXT:  .LBB9_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB9_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB9_3
+; CHECK-SDAG-NEXT:  .LBB9_2: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB9_3
+; CHECK-SDAG-NEXT:  .LBB9_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-NEXT:    s_cbranch_vccz .LBB9_2
+; GFX10-NEXT:  ; %bb.1: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB9_3
+; GFX10-NEXT:  .LBB9_2: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB9_3
+; GFX10-NEXT:  .LBB9_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    s_cbranch_vccz .LBB9_2
+; GFX11-NEXT:  ; %bb.1: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB9_3
+; GFX11-NEXT:  .LBB9_2: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB9_3
+; GFX11-NEXT:  .LBB9_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc0 .LBB9_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB9_3
+; GFX10-GISEL-NEXT:  .LBB9_2: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB9_3
+; GFX10-GISEL-NEXT:  .LBB9_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB9_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB9_3
+; GFX11-GISEL-NEXT:  .LBB9_2: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB9_3
+; GFX11-GISEL-NEXT:  .LBB9_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -153,19 +373,76 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_bitcmp1_b32 s0, 0
-; CHECK-NEXT:    s_cselect_b32 s0, -1, 0
-; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT:    s_cbranch_vccnz .LBB10_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB10_3
-; CHECK-NEXT:  .LBB10_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB10_3
-; CHECK-NEXT:  .LBB10_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_bitcmp1_b32 s0, 0
+; CHECK-SDAG-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT:    s_cbranch_vccnz .LBB10_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB10_3
+; CHECK-SDAG-NEXT:  .LBB10_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB10_3
+; CHECK-SDAG-NEXT:  .LBB10_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_bitcmp1_b32 s0, 0
+; GFX10-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX10-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT:    s_cbranch_vccnz .LBB10_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB10_3
+; GFX10-NEXT:  .LBB10_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB10_3
+; GFX10-NEXT:  .LBB10_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_bitcmp1_b32 s0, 0
+; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT:    s_cbranch_vccnz .LBB10_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB10_3
+; GFX11-NEXT:  .LBB10_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB10_3
+; GFX11-NEXT:  .LBB10_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB10_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB10_3
+; GFX10-GISEL-NEXT:  .LBB10_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB10_3
+; GFX10-GISEL-NEXT:  .LBB10_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_bitcmp1_b32 s0, 0
+; GFX11-GISEL-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT:    s_cbranch_vccnz .LBB10_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB10_3
+; GFX11-GISEL-NEXT:  .LBB10_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB10_3
+; GFX11-GISEL-NEXT:  .LBB10_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -177,17 +454,66 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    s_cbranch_vccz .LBB11_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB11_3
-; CHECK-NEXT:  .LBB11_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB11_3
-; CHECK-NEXT:  .LBB11_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB11_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB11_3
+; CHECK-SDAG-NEXT:  .LBB11_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB11_3
+; CHECK-SDAG-NEXT:  .LBB11_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT:    s_cbranch_vccz .LBB11_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB11_3
+; GFX10-NEXT:  .LBB11_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB11_3
+; GFX10-NEXT:  .LBB11_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT:    s_cbranch_vccz .LBB11_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB11_3
+; GFX11-NEXT:  .LBB11_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB11_3
+; GFX11-NEXT:  .LBB11_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT:    s_cmp_eq_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB11_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB11_3
+; GFX10-GISEL-NEXT:  .LBB11_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB11_3
+; GFX10-GISEL-NEXT:  .LBB11_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB11_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB11_3
+; GFX11-GISEL-NEXT:  .LBB11_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB11_3
+; GFX11-GISEL-NEXT:  .LBB11_3:
   %c = icmp ult i32 %v, 12
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -199,17 +525,65 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_cmp_gt_u32 s0, 11
-; CHECK-NEXT:    s_cbranch_scc1 .LBB12_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB12_3
-; CHECK-NEXT:  .LBB12_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB12_3
-; CHECK-NEXT:  .LBB12_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_cmp_gt_u32 s0, 11
+; CHECK-SDAG-NEXT:    s_cbranch_scc1 .LBB12_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB12_3
+; CHECK-SDAG-NEXT:  .LBB12_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB12_3
+; CHECK-SDAG-NEXT:  .LBB12_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_cmp_gt_u32 s0, 11
+; GFX10-NEXT:    s_cbranch_scc1 .LBB12_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB12_3
+; GFX10-NEXT:  .LBB12_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB12_3
+; GFX10-NEXT:  .LBB12_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_cmp_gt_u32 s0, 11
+; GFX11-NEXT:    s_cbranch_scc1 .LBB12_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB12_3
+; GFX11-NEXT:  .LBB12_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB12_3
+; GFX11-NEXT:  .LBB12_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_cmp_ge_u32 s0, 12
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB12_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB12_3
+; GFX10-GISEL-NEXT:  .LBB12_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB12_3
+; GFX10-GISEL-NEXT:  .LBB12_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_cmp_gt_u32 s0, 11
+; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB12_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB12_3
+; GFX11-GISEL-NEXT:  .LBB12_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB12_3
+; GFX11-GISEL-NEXT:  .LBB12_3:
   %c = icmp ult i32 %v, 12
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -221,17 +595,66 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    s_cbranch_vccz .LBB13_2
-; CHECK-NEXT:  ; %bb.1: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB13_3
-; CHECK-NEXT:  .LBB13_2: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB13_3
-; CHECK-NEXT:  .LBB13_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB13_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB13_3
+; CHECK-SDAG-NEXT:  .LBB13_2: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB13_3
+; CHECK-SDAG-NEXT:  .LBB13_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT:    s_cbranch_vccz .LBB13_2
+; GFX10-NEXT:  ; %bb.1: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB13_3
+; GFX10-NEXT:  .LBB13_2: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB13_3
+; GFX10-NEXT:  .LBB13_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT:    s_cbranch_vccz .LBB13_2
+; GFX11-NEXT:  ; %bb.1: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB13_3
+; GFX11-NEXT:  .LBB13_2: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB13_3
+; GFX11-NEXT:  .LBB13_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc0 .LBB13_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB13_3
+; GFX10-GISEL-NEXT:  .LBB13_2: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB13_3
+; GFX10-GISEL-NEXT:  .LBB13_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB13_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB13_3
+; GFX11-GISEL-NEXT:  .LBB13_2: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB13_3
+; GFX11-GISEL-NEXT:  .LBB13_3:
   %c = icmp ult i32 %v, 12
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -243,6 +666,17 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
+; CHECK-BOTH-LABEL: branch_uniform_ballot_eq_zero_compare:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    s_cmp_lt_u32 s0, 12
+; CHECK-BOTH-NEXT:    s_cbranch_scc1 .LBB14_2
+; CHECK-BOTH-NEXT:  ; %bb.1: ; %true
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT:    s_branch .LBB14_3
+; CHECK-BOTH-NEXT:  .LBB14_2: ; %false
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT:    s_branch .LBB14_3
+; CHECK-BOTH-NEXT:  .LBB14_3:
 ; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_cmp_lt_u32 s0, 12
@@ -265,19 +699,76 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-NEXT:    s_cbranch_vccz .LBB15_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB15_3
-; CHECK-NEXT:  .LBB15_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB15_3
-; CHECK-NEXT:  .LBB15_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_and:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB15_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB15_3
+; CHECK-SDAG-NEXT:  .LBB15_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB15_3
+; CHECK-SDAG-NEXT:  .LBB15_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; GFX10-NEXT:    s_cbranch_vccz .LBB15_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB15_3
+; GFX10-NEXT:  .LBB15_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB15_3
+; GFX10-NEXT:  .LBB15_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-NEXT:    s_cbranch_vccz .LBB15_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB15_3
+; GFX11-NEXT:  .LBB15_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB15_3
+; GFX11-NEXT:  .LBB15_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-GISEL-NEXT:    s_and_b32 s0, vcc_lo, s0
+; GFX10-GISEL-NEXT:    s_cmp_eq_u32 s0, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB15_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB15_3
+; GFX10-GISEL-NEXT:  .LBB15_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB15_3
+; GFX10-GISEL-NEXT:  .LBB15_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB15_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB15_3
+; GFX11-GISEL-NEXT:  .LBB15_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB15_3
+; GFX11-GISEL-NEXT:  .LBB15_3:
   %v1c = icmp ult i32 %v1, 12
   %v2c = icmp ugt i32 %v2, 34
   %c = and i1 %v1c, %v2c
@@ -291,22 +782,90 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_cmp_gt_u32 s0, 11
-; CHECK-NEXT:    s_cselect_b32 s0, -1, 0
-; CHECK-NEXT:    s_cmp_lt_u32 s1, 35
-; CHECK-NEXT:    s_cselect_b32 s1, -1, 0
-; CHECK-NEXT:    s_or_b32 s0, s0, s1
-; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT:    s_cbranch_vccnz .LBB16_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB16_3
-; CHECK-NEXT:  .LBB16_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB16_3
-; CHECK-NEXT:  .LBB16_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_and:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_cmp_gt_u32 s0, 11
+; CHECK-SDAG-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT:    s_cmp_lt_u32 s1, 35
+; CHECK-SDAG-NEXT:    s_cselect_b32 s1, -1, 0
+; CHECK-SDAG-NEXT:    s_or_b32 s0, s0, s1
+; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT:    s_cbranch_vccnz .LBB16_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB16_3
+; CHECK-SDAG-NEXT:  .LBB16_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB16_3
+; CHECK-SDAG-NEXT:  .LBB16_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_cmp_gt_u32 s0, 11
+; GFX10-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX10-NEXT:    s_cmp_lt_u32 s1, 35
+; GFX10-NEXT:    s_cselect_b32 s1, -1, 0
+; GFX10-NEXT:    s_or_b32 s0, s0, s1
+; GFX10-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT:    s_cbranch_vccnz .LBB16_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB16_3
+; GFX10-NEXT:  .LBB16_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB16_3
+; GFX10-NEXT:  .LBB16_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_cmp_gt_u32 s0, 11
+; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-NEXT:    s_cmp_lt_u32 s1, 35
+; GFX11-NEXT:    s_cselect_b32 s1, -1, 0
+; GFX11-NEXT:    s_or_b32 s0, s0, s1
+; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT:    s_cbranch_vccnz .LBB16_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB16_3
+; GFX11-NEXT:  .LBB16_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB16_3
+; GFX11-NEXT:  .LBB16_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_cmp_ge_u32 s0, 12
+; GFX10-GISEL-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10-GISEL-NEXT:    s_cmp_le_u32 s1, 34
+; GFX10-GISEL-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX10-GISEL-NEXT:    s_or_b32 s0, s0, s1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB16_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB16_3
+; GFX10-GISEL-NEXT:  .LBB16_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB16_3
+; GFX10-GISEL-NEXT:  .LBB16_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_cmp_gt_u32 s0, 11
+; GFX11-GISEL-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT:    s_cmp_lt_u32 s1, 35
+; GFX11-GISEL-NEXT:    s_cselect_b32 s1, -1, 0
+; GFX11-GISEL-NEXT:    s_or_b32 s0, s0, s1
+; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT:    s_cbranch_vccnz .LBB16_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB16_3
+; GFX11-GISEL-NEXT:  .LBB16_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB16_3
+; GFX11-GISEL-NEXT:  .LBB16_3:
   %v1c = icmp ult i32 %v1, 12
   %v2c = icmp ugt i32 %v2, 34
   %c = and i1 %v1c, %v2c
@@ -320,19 +879,75 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_and:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-NEXT:    s_cbranch_vccz .LBB17_2
-; CHECK-NEXT:  ; %bb.1: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB17_3
-; CHECK-NEXT:  .LBB17_2: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB17_3
-; CHECK-NEXT:  .LBB17_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_and:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB17_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB17_3
+; CHECK-SDAG-NEXT:  .LBB17_2: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB17_3
+; CHECK-SDAG-NEXT:  .LBB17_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; GFX10-NEXT:    s_cbranch_vccz .LBB17_2
+; GFX10-NEXT:  ; %bb.1: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB17_3
+; GFX10-NEXT:  .LBB17_2: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB17_3
+; GFX10-NEXT:  .LBB17_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-NEXT:    s_cbranch_vccz .LBB17_2
+; GFX11-NEXT:  ; %bb.1: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB17_3
+; GFX11-NEXT:  .LBB17_2: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB17_3
+; GFX11-NEXT:  .LBB17_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-GISEL-NEXT:    s_and_b32 s0, vcc_lo, s0
+; GFX10-GISEL-NEXT:    s_cbranch_scc0 .LBB17_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB17_3
+; GFX10-GISEL-NEXT:  .LBB17_2: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB17_3
+; GFX10-GISEL-NEXT:  .LBB17_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB17_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB17_3
+; GFX11-GISEL-NEXT:  .LBB17_2: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB17_3
+; GFX11-GISEL-NEXT:  .LBB17_3:
   %v1c = icmp ult i32 %v1, 12
   %v2c = icmp ugt i32 %v2, 34
   %c = and i1 %v1c, %v2c
@@ -346,22 +961,90 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-LABEL: branch_uniform_ballot_eq_zero_and:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_cmp_lt_u32 s0, 12
-; CHECK-NEXT:    s_cselect_b32 s0, -1, 0
-; CHECK-NEXT:    s_cmp_gt_u32 s1, 34
-; CHECK-NEXT:    s_cselect_b32 s1, -1, 0
-; CHECK-NEXT:    s_and_b32 s0, s0, s1
-; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT:    s_cbranch_vccnz .LBB18_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB18_3
-; CHECK-NEXT:  .LBB18_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB18_3
-; CHECK-NEXT:  .LBB18_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_and:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_cmp_lt_u32 s0, 12
+; CHECK-SDAG-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT:    s_cmp_gt_u32 s1, 34
+; CHECK-SDAG-NEXT:    s_cselect_b32 s1, -1, 0
+; CHECK-SDAG-NEXT:    s_and_b32 s0, s0, s1
+; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT:    s_cbranch_vccnz .LBB18_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB18_3
+; CHECK-SDAG-NEXT:  .LBB18_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB18_3
+; CHECK-SDAG-NEXT:  .LBB18_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_cmp_lt_u32 s0, 12
+; GFX10-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX10-NEXT:    s_cmp_gt_u32 s1, 34
+; GFX10-NEXT:    s_cselect_b32 s1, -1, 0
+; GFX10-NEXT:    s_and_b32 s0, s0, s1
+; GFX10-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT:    s_cbranch_vccnz .LBB18_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB18_3
+; GFX10-NEXT:  .LBB18_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB18_3
+; GFX10-NEXT:  .LBB18_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_cmp_lt_u32 s0, 12
+; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-NEXT:    s_cmp_gt_u32 s1, 34
+; GFX11-NEXT:    s_cselect_b32 s1, -1, 0
+; GFX11-NEXT:    s_and_b32 s0, s0, s1
+; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT:    s_cbranch_vccnz .LBB18_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB18_3
+; GFX11-NEXT:  .LBB18_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB18_3
+; GFX11-NEXT:  .LBB18_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_cmp_lt_u32 s0, 12
+; GFX10-GISEL-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10-GISEL-NEXT:    s_cmp_gt_u32 s1, 34
+; GFX10-GISEL-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, s1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB18_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB18_3
+; GFX10-GISEL-NEXT:  .LBB18_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB18_3
+; GFX10-GISEL-NEXT:  .LBB18_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_cmp_lt_u32 s0, 12
+; GFX11-GISEL-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT:    s_cmp_gt_u32 s1, 34
+; GFX11-GISEL-NEXT:    s_cselect_b32 s1, -1, 0
+; GFX11-GISEL-NEXT:    s_and_b32 s0, s0, s1
+; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT:    s_cbranch_vccnz .LBB18_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB18_3
+; GFX11-GISEL-NEXT:  .LBB18_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB18_3
+; GFX11-GISEL-NEXT:  .LBB18_3:
   %v1c = icmp ult i32 %v1, 12
   %v2c = icmp ugt i32 %v2, 34
   %c = and i1 %v1c, %v2c
@@ -375,18 +1058,72 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
-; CHECK-NEXT:    s_cmp_lt_i32 s0, 23
-; CHECK-NEXT:    s_cbranch_scc1 .LBB19_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB19_3
-; CHECK-NEXT:  .LBB19_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB19_3
-; CHECK-NEXT:  .LBB19_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_sgt_N_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
+; CHECK-SDAG-NEXT:    s_cmp_lt_i32 s0, 23
+; CHECK-SDAG-NEXT:    s_cbranch_scc1 .LBB19_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB19_3
+; CHECK-SDAG-NEXT:  .LBB19_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB19_3
+; CHECK-SDAG-NEXT:  .LBB19_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
+; GFX10-NEXT:    s_cmp_lt_i32 s0, 23
+; GFX10-NEXT:    s_cbranch_scc1 .LBB19_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB19_3
+; GFX10-NEXT:  .LBB19_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB19_3
+; GFX10-NEXT:  .LBB19_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
+; GFX11-NEXT:    s_cmp_lt_i32 s0, 23
+; GFX11-NEXT:    s_cbranch_scc1 .LBB19_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB19_3
+; GFX11-NEXT:  .LBB19_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB19_3
+; GFX11-NEXT:  .LBB19_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_cmp_lt_u32 s0, 12
+; GFX10-GISEL-NEXT:    s_cselect_b32 s0, exec_lo, 0
+; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT:    s_cmp_le_i32 s0, 22
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB19_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB19_3
+; GFX10-GISEL-NEXT:  .LBB19_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB19_3
+; GFX10-GISEL-NEXT:  .LBB19_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
+; GFX11-GISEL-NEXT:    s_cmp_lt_i32 s0, 23
+; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB19_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB19_3
+; GFX11-GISEL-NEXT:  .LBB19_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB19_3
+; GFX11-GISEL-NEXT:  .LBB19_3:
   %c = icmp ult i32 %v, 12
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %bc = icmp sgt i32 %ballot, 22
@@ -400,6 +1137,19 @@ false:
 declare i32 @llvm.amdgcn.icmp.i32(i1, i1, i32)
 
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
+; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-BOTH-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-BOTH-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-BOTH-NEXT:    s_cbranch_vccnz .LBB20_2
+; CHECK-BOTH-NEXT:  ; %bb.1: ; %true
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT:    s_branch .LBB20_3
+; CHECK-BOTH-NEXT:  .LBB20_2: ; %false
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT:    s_branch .LBB20_3
+; CHECK-BOTH-NEXT:  .LBB20_3:
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -440,6 +1190,22 @@ define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_ne_zero_and(i32 in
 ;   s_cselect_b32 s0, s0, 0
 ;   s_and_b32 s0, s0, exec_lo
 ; By selecting into vcc(_lo) instead, we could even avoid the AND-with-exec.
+; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    s_cmp_lt_u32 s0, 12
+; CHECK-BOTH-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-BOTH-NEXT:    s_cmp_gt_u32 s1, 34
+; CHECK-BOTH-NEXT:    s_cselect_b32 s1, -1, 0
+; CHECK-BOTH-NEXT:    s_and_b32 s0, s0, s1
+; CHECK-BOTH-NEXT:    s_and_b32 s0, s0, exec_lo
+; CHECK-BOTH-NEXT:    s_cbranch_scc1 .LBB21_2
+; CHECK-BOTH-NEXT:  ; %bb.1: ; %true
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT:    s_branch .LBB21_3
+; CHECK-BOTH-NEXT:  .LBB21_2: ; %false
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT:    s_branch .LBB21_3
+; CHECK-BOTH-NEXT:  .LBB21_3:
 ; CHECK-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_cmp_lt_u32 s0, 12
@@ -469,6 +1235,19 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) {
+; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-BOTH-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-BOTH-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-BOTH-NEXT:    s_cbranch_vccnz .LBB22_2
+; CHECK-BOTH-NEXT:  ; %bb.1: ; %false
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT:    s_branch .LBB22_3
+; CHECK-BOTH-NEXT:  .LBB22_2: ; %true
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT:    s_branch .LBB22_3
+; CHECK-BOTH-NEXT:  .LBB22_3:
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -495,6 +1274,22 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
+; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    s_cmp_lt_u32 s0, 12
+; CHECK-BOTH-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-BOTH-NEXT:    s_cmp_gt_u32 s1, 34
+; CHECK-BOTH-NEXT:    s_cselect_b32 s1, -1, 0
+; CHECK-BOTH-NEXT:    s_and_b32 s0, s0, s1
+; CHECK-BOTH-NEXT:    s_and_b32 s0, s0, exec_lo
+; CHECK-BOTH-NEXT:    s_cbranch_scc1 .LBB23_2
+; CHECK-BOTH-NEXT:  ; %bb.1: ; %false
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT:    s_branch .LBB23_3
+; CHECK-BOTH-NEXT:  .LBB23_2: ; %true
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT:    s_branch .LBB23_3
+; CHECK-BOTH-NEXT:  .LBB23_3:
 ; CHECK-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_cmp_lt_u32 s0, 12
@@ -573,6 +1368,58 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: non_cst_non_compare_input:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v3
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT:    s_cselect_b32 s0, exec_lo, 0
+; GFX10-GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
+; GFX10-GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %B
+; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 2, v2
+; GFX10-GISEL-NEXT:    s_andn2_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT:    ; implicit-def: $vgpr2
+; GFX10-GISEL-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
+; GFX10-GISEL-NEXT:    s_or_b32 s0, s0, s2
+; GFX10-GISEL-NEXT:  ; %bb.2: ; %Flow
+; GFX10-GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
+; GFX10-GISEL-NEXT:  ; %bb.3: ; %A
+; GFX10-GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 1, v2
+; GFX10-GISEL-NEXT:    s_andn2_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
+; GFX10-GISEL-NEXT:    s_or_b32 s0, s0, s2
+; GFX10-GISEL-NEXT:  ; %bb.4: ; %exit
+; GFX10-GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
+; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX10-GISEL-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-GISEL-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: non_cst_non_compare_input:
+; GFX11-GISEL:       ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
+; GFX11-GISEL-NEXT:    ; implicit-def: $sgpr0
+; GFX11-GISEL-NEXT:    v_cmpx_ne_u32_e32 0, v3
+; GFX11-GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %B
+; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e64 s0, 2, v2
+; GFX11-GISEL-NEXT:    ; implicit-def: $vgpr2
+; GFX11-GISEL-NEXT:  ; %bb.2: ; %Flow
+; GFX11-GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
+; GFX11-GISEL-NEXT:  ; %bb.3: ; %A
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-GISEL-NEXT:    s_and_not1_b32 s0, s0, exec_lo
+; GFX11-GISEL-NEXT:    s_and_b32 s2, vcc_lo, exec_lo
+; GFX11-GISEL-NEXT:    s_or_b32 s0, s0, s2
+; GFX11-GISEL-NEXT:  ; %bb.4: ; %exit
+; GFX11-GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e64 s0, 0, v2
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX11-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-GISEL-NEXT:    s_endpgm
 entry:
   %cmp = icmp eq i32 %cond, 0
   br i1 %cmp, label %A, label %B
@@ -608,6 +1455,22 @@ define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) {
 ; GFX11-NEXT:    v_mov_b16_e32 v1.l, v2.l
 ; GFX11-NEXT:    v_cmp_gt_f32_e64 s0, v1, v2
 ; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: compare_bfloats:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-GISEL-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: compare_bfloats:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX11-GISEL-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX11-GISEL-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX11-GISEL-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX11-GISEL-NEXT:    v_cmp_gt_f32_e64 s0, v1, v2
+; GFX11-GISEL-NEXT:    ; return to shader part epilog
   %cmp = fcmp ogt bfloat %x, %y
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
   ret i32 %ballot

>From 6eee23f1d1831a23d2fdb337b17aaea40c640e8d Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Fri, 12 Dec 2025 09:50:36 -0500
Subject: [PATCH 2/4] Reformat.

---
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     | 44 +++++++++----------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 05ba285bd1379..48a29320a998c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1019,33 +1019,29 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
 
   addRulesForIOpcs({amdgcn_icmp})
-    .Any({{UniS64, _, S1},
-          {{Sgpr64}, {IntrId, Vcc, Vcc}}})
-    .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
-    .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
-    .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
-
-    .Any({{UniS32, _, S1},
-          {{Sgpr32}, {IntrId, Vcc, Vcc}}})
-    .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
-    .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
-    .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+      .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+      .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+      .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+      .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
 
   addRulesForIOpcs({amdgcn_fcmp})
-    .Any({{UniS64, _, S1},
-          {{Sgpr64}, {IntrId, Vcc, Vcc}}})
-    .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
-    .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
-    .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
-
-    .Any({{UniS32, _, S1},
-          {{Sgpr32}, {IntrId, Vcc, Vcc}}})
-    .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
-    .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
-    .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+      .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+      .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+      .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+      .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
 
   addRulesForIOpcs({amdgcn_ballot}, Standard)
-    .Uni(S64, {{Sgpr64}, {None, Vcc}})
-    .Uni(S32, {{Sgpr32}, {None, Vcc}});
+      .Uni(S64, {{Sgpr64}, {None, Vcc}})
+      .Uni(S32, {{Sgpr32}, {None, Vcc}});
 
 } // end initialize rules

>From 5fffb40a7a050d49cf5a02665a86daefdd8871c0 Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Wed, 17 Dec 2025 05:33:23 -0500
Subject: [PATCH 3/4] Update tests.

---
 .../GlobalISel/regbankselect-amdgcn.fcmp.mir  |   4 +-
 .../GlobalISel/regbankselect-amdgcn.icmp.mir  |   4 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll    |   4 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll    | 170 ++++++++++++------
 .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll    |  12 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll    |  15 +-
 6 files changed, 122 insertions(+), 87 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
index be59d356af05e..00c2a6102de38 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy  -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy  -verify-machineinstrs  -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
 
 ---
 name: fcmp_ss
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
index 434cc138f3704..2ba4a3c6cf043 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
 
 ---
 name: icmp_ss
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
index 87a9ba30490a0..edda79b813378 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=SDAG-GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=SDAG-GFX10 %s
 
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
 
 declare i32 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
 declare i32 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
index 9e4824694e76a..67a973d3b0e07 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-GISEL %s
 
 declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
 declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
@@ -26,17 +26,29 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_fcmp_f32_oeq_with_fabs:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_cmp_eq_f32_e64 s[2:3], s2, |v0|
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_fcmp_f32_oeq_with_fabs:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-SDAG-NEXT:    v_cmp_eq_f32_e64 s[2:3], s2, |v0|
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f32_oeq_with_fabs:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_cmp_eq_f32_e64 s[2:3], v0, |s3|
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
 ; VI-SDAG-LABEL: v_fcmp_f32_oeq_with_fabs:
 ; VI-SDAG:       ; %bb.0:
@@ -55,8 +67,8 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
-; VI-GISEL-NEXT:    v_cmp_eq_f32_e64 s[2:3], s2, |v0|
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_cmp_eq_f32_e64 s[2:3], v0, |s3|
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -82,17 +94,29 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(ptr addrspace(
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_cmp_eq_f32_e64 s[2:3], |s2|, |v0|
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-SDAG-NEXT:    v_cmp_eq_f32_e64 s[2:3], |s2|, |v0|
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_cmp_eq_f32_e64 s[2:3], |v0|, |s3|
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
 ; VI-SDAG-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs:
 ; VI-SDAG:       ; %bb.0:
@@ -111,8 +135,8 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(ptr addrspace(
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
-; VI-GISEL-NEXT:    v_cmp_eq_f32_e64 s[2:3], |s2|, |v0|
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_cmp_eq_f32_e64 s[2:3], |v0|, |s3|
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -1838,19 +1862,33 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(ptr addrspace(1) %out, half
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_fcmp_f16_oeq_with_fabs:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshr_b32 s3, s2, 16
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_cmp_eq_f16_e64 s[2:3], s2, |v0|
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_fcmp_f16_oeq_with_fabs:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_lshr_b32 s3, s2, 16
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-SDAG-NEXT:    v_cmp_eq_f16_e64 s[2:3], s2, |v0|
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f16_oeq_with_fabs:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_lshr_b32 s3, s2, 16
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_cmp_eq_f16_e64 s[2:3], v0, |s3|
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
 ; VI-SDAG-LABEL: v_fcmp_f16_oeq_with_fabs:
 ; VI-SDAG:       ; %bb.0:
@@ -1873,8 +1911,8 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(ptr addrspace(1) %out, half
 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    s_lshr_b32 s3, s2, 16
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
-; VI-GISEL-NEXT:    v_cmp_eq_f16_e64 s[2:3], s2, |v0|
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_cmp_eq_f16_e64 s[2:3], v0, |s3|
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -1905,19 +1943,33 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(ptr addrspace(
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshr_b32 s3, s2, 16
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_cmp_eq_f16_e64 s[2:3], |s2|, |v0|
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_lshr_b32 s3, s2, 16
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-SDAG-NEXT:    v_cmp_eq_f16_e64 s[2:3], |s2|, |v0|
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_lshr_b32 s3, s2, 16
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_cmp_eq_f16_e64 s[2:3], |v0|, |s3|
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
 ; VI-SDAG-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs:
 ; VI-SDAG:       ; %bb.0:
@@ -1940,8 +1992,8 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(ptr addrspace(
 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    s_lshr_b32 s3, s2, 16
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
-; VI-GISEL-NEXT:    v_cmp_eq_f16_e64 s[2:3], |s2|, |v0|
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_cmp_eq_f16_e64 s[2:3], |v0|, |s3|
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
index e2b068e2e9105..f8ccd40d37bcc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
@@ -2,16 +2,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,SDAG-GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,SDAG-GFX10 %s
 
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s 2>%t | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s 2>%t | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-
-; Note: GlobalISel abort is disabled so we don't crash on i1 inputs.
-;  They are allowed in DAGISel but we (intentionally) don't support them
-;  in GlobalISel.
-
-; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
 
 declare i32 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
 declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
index 366b71bae75c9..e6278cf2a52ee 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
@@ -3,18 +3,9 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,SDAG-VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,SDAG-GFX9 %s
 
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s 2>%t | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=fiji < %s 2>%t | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx900 < %s 2>%t | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-
-; Note: GlobalISel abort is disabled so we don't crash on i1 inputs.
-;  They are allowed in DAGISel but we (intentionally) don't support them
-;  in GlobalISel.
-
-; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
 
 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0

>From a5ec298611216f48c6230583293e8312212e151d Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Tue, 6 Jan 2026 07:03:49 -0500
Subject: [PATCH 4/4] Make various changes requested in review feedback.

---
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp    |   15 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |   16 +-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h       |    1 +
 .../GlobalISel/regbankselect-amdgcn.fcmp.mir  |    3 +-
 .../GlobalISel/regbankselect-amdgcn.icmp.mir  |    3 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll  | 1183 +++--------------
 .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll    |    4 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll    |    6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll    |   92 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll    |  148 ++-
 10 files changed, 354 insertions(+), 1117 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index cc31d7d5c55ac..9f57b2cd94a7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -934,6 +934,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
   case Sgpr32Trunc:
   case Sgpr32AExt:
   case Sgpr32AExtBoolInReg:
+  case Vgpr32AExtBoolInReg:
   case Sgpr32SExt:
   case Sgpr32ZExt:
   case UniInVgprS32:
@@ -1084,6 +1085,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case Sgpr32Trunc:
   case Sgpr32AExt:
   case Sgpr32AExtBoolInReg:
+  case Vgpr32AExtBoolInReg:
   case Sgpr32SExt:
   case Sgpr32ZExt:
     return SgprRB;
@@ -1380,6 +1382,19 @@ bool RegBankLegalizeHelper::applyMappingSrc(
       Op.setReg(BoolInReg.getReg(0));
       break;
     }
+    case Vgpr32AExtBoolInReg: {
+      // Note: this ext allows S1, and it is meant to be combined away.
+      assert(Ty.getSizeInBits() == 1);
+      assert(RB == SgprRB);
+      auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
+      // Zext SgprS1 is not legal, make AND with 1 instead. This instruction is
+      // most of times meant to be combined away in AMDGPURegBankCombiner.
+      auto Cst1 = B.buildConstant(SgprRB_S32, 1);
+      auto BoolInReg = B.buildAnd(SgprRB_S32, Aext, Cst1);
+
+      Op.setReg(B.buildCopy(VgprRB_S32, BoolInReg).getReg(0));
+      break;
+    }
     case Sgpr32SExt: {
       assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
       assert(RB == SgprRB);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 48a29320a998c..49ac8d208b461 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1019,29 +1019,29 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
 
   addRulesForIOpcs({amdgcn_icmp})
-      .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS64, _, S1},
+            {{Sgpr64}, {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg}}})
       .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
       .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
       .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
 
-      .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS32, _, S1},
+            {{Sgpr32}, {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg}}})
       .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
       .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
       .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
 
   addRulesForIOpcs({amdgcn_fcmp})
-      .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS64, _, S1},
+            {{Sgpr64}, {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg}}})
       .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
       .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
       .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
 
-      .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS32, _, S1},
+            {{Sgpr32}, {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg}}})
       .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
       .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
       .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
 
-  addRulesForIOpcs({amdgcn_ballot}, Standard)
-      .Uni(S64, {{Sgpr64}, {None, Vcc}})
-      .Uni(S32, {{Sgpr32}, {None, Vcc}});
-
 } // end initialize rules
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 1ac117304b76f..67e9f510640c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -204,6 +204,7 @@ enum RegBankLLTMappingApplyID {
   // Src only modifiers: extends
   Sgpr32AExt,
   Sgpr32AExtBoolInReg,
+  Vgpr32AExtBoolInReg,
   Sgpr32SExt,
   Sgpr32ZExt,
   Vgpr32SExt,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
index 00c2a6102de38..685a712bbcd18 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy  -verify-machineinstrs  -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s
 
 ---
 name: fcmp_ss
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
index 2ba4a3c6cf043..5c181562954d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s
 
 ---
 name: icmp_ss
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index 3b93164539df7..c1f3a12dba578 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -1,8 +1,6 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --no-generate-body-for-unused-prefixes
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select -global-isel-abort=0 < %s | FileCheck -check-prefixes=CHECK,GFX10-GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11-GISEL %s
 
 declare i32 @llvm.amdgcn.ballot.i32(i1)
 declare i32 @llvm.ctpop.i32(i32)
@@ -10,10 +8,6 @@ declare i32 @llvm.ctpop.i32(i32)
 ; Test ballot(0)
 
 define amdgpu_cs i32 @constant_false() {
-; CHECK-BOTH-LABEL: constant_false:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 0
-; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: constant_false:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_mov_b32 s0, 0
@@ -25,30 +19,10 @@ define amdgpu_cs i32 @constant_false() {
 ; Test ballot(1)
 
 define amdgpu_cs i32 @constant_true() {
-; CHECK-SDAG-LABEL: constant_true:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, exec_lo
-; CHECK-SDAG-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: constant_true:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_mov_b32 s0, exec_lo
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX11-LABEL: constant_true:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_mov_b32 s0, exec_lo
-; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX10-GISEL-LABEL: constant_true:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_and_b32 s0, exec_lo, exec_lo
-; GFX10-GISEL-NEXT:    ; return to shader part epilog
-;
-; GFX11-GISEL-LABEL: constant_true:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, exec_lo
-; GFX11-GISEL-NEXT:    ; return to shader part epilog
+; CHECK-LABEL: constant_true:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, exec_lo
+; CHECK-NEXT:    ; return to shader part epilog
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)
   ret i32 %ballot
 }
@@ -56,11 +30,6 @@ define amdgpu_cs i32 @constant_true() {
 ; Test ballot of a non-comparison operation
 
 define amdgpu_cs i32 @non_compare(i32 %x) {
-; CHECK-BOTH-LABEL: non_compare:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-BOTH-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
-; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: non_compare:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -74,10 +43,6 @@ define amdgpu_cs i32 @non_compare(i32 %x) {
 ; Test ballot of comparisons
 
 define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
-; CHECK-BOTH-LABEL: compare_ints:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
-; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: compare_ints:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
@@ -88,40 +53,16 @@ define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
 }
 
 define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
-; CHECK-SDAG-LABEL: compare_int_with_constant:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
-; CHECK-SDAG-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: compare_int_with_constant:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX11-LABEL: compare_int_with_constant:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
-; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX10-GISEL-LABEL: compare_int_with_constant:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    v_cmp_le_i32_e64 s0, 0x63, v0
-; GFX10-GISEL-NEXT:    ; return to shader part epilog
-;
-; GFX11-GISEL-LABEL: compare_int_with_constant:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
-; GFX11-GISEL-NEXT:    ; return to shader part epilog
+; CHECK-LABEL: compare_int_with_constant:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; CHECK-NEXT:    ; return to shader part epilog
   %cmp = icmp sge i32 %x, 99
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
   ret i32 %ballot
 }
 
 define amdgpu_cs i32 @compare_floats(float %x, float %y) {
-; CHECK-BOTH-LABEL: compare_floats:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
-; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: compare_floats:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
@@ -132,11 +73,6 @@ define amdgpu_cs i32 @compare_floats(float %x, float %y) {
 }
 
 define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
-; CHECK-BOTH-LABEL: ctpop_of_ballot:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v0, v1
-; CHECK-BOTH-NEXT:    s_bcnt1_i32_b32 s0, vcc_lo
-; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: ctpop_of_ballot:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v0, v1
@@ -149,71 +85,18 @@ define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB7_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB7_3
-; CHECK-SDAG-NEXT:  .LBB7_2: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB7_3
-; CHECK-SDAG-NEXT:  .LBB7_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-NEXT:    s_cbranch_vccz .LBB7_2
-; GFX10-NEXT:  ; %bb.1: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB7_3
-; GFX10-NEXT:  .LBB7_2: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB7_3
-; GFX10-NEXT:  .LBB7_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_cbranch_vccz .LBB7_2
-; GFX11-NEXT:  ; %bb.1: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB7_3
-; GFX11-NEXT:  .LBB7_2: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB7_3
-; GFX11-NEXT:  .LBB7_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT:    s_cmp_eq_u32 vcc_lo, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB7_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB7_3
-; GFX10-GISEL-NEXT:  .LBB7_2: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB7_3
-; GFX10-GISEL-NEXT:  .LBB7_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB7_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB7_3
-; GFX11-GISEL-NEXT:  .LBB7_2: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB7_3
-; GFX11-GISEL-NEXT:  .LBB7_3:
+; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-NEXT:    s_cbranch_vccz .LBB7_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB7_3
+; CHECK-NEXT:  .LBB7_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB7_3
+; CHECK-NEXT:  .LBB7_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -225,67 +108,17 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    s_bitcmp0_b32 s0, 0
-; CHECK-SDAG-NEXT:    s_cbranch_scc1 .LBB8_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB8_3
-; CHECK-SDAG-NEXT:  .LBB8_2: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB8_3
-; CHECK-SDAG-NEXT:  .LBB8_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_bitcmp0_b32 s0, 0
-; GFX10-NEXT:    s_cbranch_scc1 .LBB8_2
-; GFX10-NEXT:  ; %bb.1: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB8_3
-; GFX10-NEXT:  .LBB8_2: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB8_3
-; GFX10-NEXT:  .LBB8_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_bitcmp0_b32 s0, 0
-; GFX11-NEXT:    s_cbranch_scc1 .LBB8_2
-; GFX11-NEXT:  ; %bb.1: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB8_3
-; GFX11-NEXT:  .LBB8_2: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB8_3
-; GFX11-NEXT:  .LBB8_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
-; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, 1
-; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB8_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB8_3
-; GFX10-GISEL-NEXT:  .LBB8_2: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB8_3
-; GFX10-GISEL-NEXT:  .LBB8_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_bitcmp0_b32 s0, 0
-; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB8_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB8_3
-; GFX11-GISEL-NEXT:  .LBB8_2: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB8_3
-; GFX11-GISEL-NEXT:  .LBB8_3:
+; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_bitcmp0_b32 s0, 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB8_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB8_3
+; CHECK-NEXT:  .LBB8_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB8_3
+; CHECK-NEXT:  .LBB8_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -297,71 +130,18 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB9_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB9_3
-; CHECK-SDAG-NEXT:  .LBB9_2: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB9_3
-; CHECK-SDAG-NEXT:  .LBB9_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-NEXT:    s_cbranch_vccz .LBB9_2
-; GFX10-NEXT:  ; %bb.1: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB9_3
-; GFX10-NEXT:  .LBB9_2: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB9_3
-; GFX10-NEXT:  .LBB9_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_cbranch_vccz .LBB9_2
-; GFX11-NEXT:  ; %bb.1: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB9_3
-; GFX11-NEXT:  .LBB9_2: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB9_3
-; GFX11-NEXT:  .LBB9_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT:    s_cmp_lg_u32 vcc_lo, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc0 .LBB9_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB9_3
-; GFX10-GISEL-NEXT:  .LBB9_2: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB9_3
-; GFX10-GISEL-NEXT:  .LBB9_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB9_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB9_3
-; GFX11-GISEL-NEXT:  .LBB9_2: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB9_3
-; GFX11-GISEL-NEXT:  .LBB9_3:
+; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-NEXT:    s_cbranch_vccz .LBB9_2
+; CHECK-NEXT:  ; %bb.1: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB9_3
+; CHECK-NEXT:  .LBB9_2: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB9_3
+; CHECK-NEXT:  .LBB9_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -373,76 +153,19 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    s_bitcmp1_b32 s0, 0
-; CHECK-SDAG-NEXT:    s_cselect_b32 s0, -1, 0
-; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-SDAG-NEXT:    s_cbranch_vccnz .LBB10_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB10_3
-; CHECK-SDAG-NEXT:  .LBB10_2: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB10_3
-; CHECK-SDAG-NEXT:  .LBB10_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_bitcmp1_b32 s0, 0
-; GFX10-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX10-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; GFX10-NEXT:    s_cbranch_vccnz .LBB10_2
-; GFX10-NEXT:  ; %bb.1: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB10_3
-; GFX10-NEXT:  .LBB10_2: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB10_3
-; GFX10-NEXT:  .LBB10_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_bitcmp1_b32 s0, 0
-; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-NEXT:    s_cbranch_vccnz .LBB10_2
-; GFX11-NEXT:  ; %bb.1: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB10_3
-; GFX11-NEXT:  .LBB10_2: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB10_3
-; GFX11-NEXT:  .LBB10_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
-; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
-; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, 1
-; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB10_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB10_3
-; GFX10-GISEL-NEXT:  .LBB10_2: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB10_3
-; GFX10-GISEL-NEXT:  .LBB10_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_bitcmp1_b32 s0, 0
-; GFX11-GISEL-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-GISEL-NEXT:    s_cbranch_vccnz .LBB10_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB10_3
-; GFX11-GISEL-NEXT:  .LBB10_2: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB10_3
-; GFX11-GISEL-NEXT:  .LBB10_3:
+; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_bitcmp1_b32 s0, 0
+; CHECK-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-NEXT:    s_cbranch_vccnz .LBB10_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB10_3
+; CHECK-NEXT:  .LBB10_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB10_3
+; CHECK-NEXT:  .LBB10_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -454,66 +177,17 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_compare:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB11_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB11_3
-; CHECK-SDAG-NEXT:  .LBB11_2: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB11_3
-; CHECK-SDAG-NEXT:  .LBB11_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_ne_zero_compare:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT:    s_cbranch_vccz .LBB11_2
-; GFX10-NEXT:  ; %bb.1: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB11_3
-; GFX10-NEXT:  .LBB11_2: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB11_3
-; GFX10-NEXT:  .LBB11_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_ne_zero_compare:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT:    s_cbranch_vccz .LBB11_2
-; GFX11-NEXT:  ; %bb.1: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB11_3
-; GFX11-NEXT:  .LBB11_2: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB11_3
-; GFX11-NEXT:  .LBB11_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-GISEL-NEXT:    s_cmp_eq_u32 vcc_lo, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB11_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB11_3
-; GFX10-GISEL-NEXT:  .LBB11_2: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB11_3
-; GFX10-GISEL-NEXT:  .LBB11_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB11_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB11_3
-; GFX11-GISEL-NEXT:  .LBB11_2: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB11_3
-; GFX11-GISEL-NEXT:  .LBB11_3:
+; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    s_cbranch_vccz .LBB11_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB11_3
+; CHECK-NEXT:  .LBB11_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB11_3
+; CHECK-NEXT:  .LBB11_3:
   %c = icmp ult i32 %v, 12
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -525,65 +199,17 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_compare:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    s_cmp_gt_u32 s0, 11
-; CHECK-SDAG-NEXT:    s_cbranch_scc1 .LBB12_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB12_3
-; CHECK-SDAG-NEXT:  .LBB12_2: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB12_3
-; CHECK-SDAG-NEXT:  .LBB12_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_ne_zero_compare:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_cmp_gt_u32 s0, 11
-; GFX10-NEXT:    s_cbranch_scc1 .LBB12_2
-; GFX10-NEXT:  ; %bb.1: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB12_3
-; GFX10-NEXT:  .LBB12_2: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB12_3
-; GFX10-NEXT:  .LBB12_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_ne_zero_compare:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_cmp_gt_u32 s0, 11
-; GFX11-NEXT:    s_cbranch_scc1 .LBB12_2
-; GFX11-NEXT:  ; %bb.1: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB12_3
-; GFX11-NEXT:  .LBB12_2: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB12_3
-; GFX11-NEXT:  .LBB12_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_cmp_ge_u32 s0, 12
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB12_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB12_3
-; GFX10-GISEL-NEXT:  .LBB12_2: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB12_3
-; GFX10-GISEL-NEXT:  .LBB12_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_cmp_gt_u32 s0, 11
-; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB12_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB12_3
-; GFX11-GISEL-NEXT:  .LBB12_2: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB12_3
-; GFX11-GISEL-NEXT:  .LBB12_3:
+; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_cmp_gt_u32 s0, 11
+; CHECK-NEXT:    s_cbranch_scc1 .LBB12_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB12_3
+; CHECK-NEXT:  .LBB12_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB12_3
+; CHECK-NEXT:  .LBB12_3:
   %c = icmp ult i32 %v, 12
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -595,66 +221,17 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_compare:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB13_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB13_3
-; CHECK-SDAG-NEXT:  .LBB13_2: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB13_3
-; CHECK-SDAG-NEXT:  .LBB13_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_eq_zero_compare:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT:    s_cbranch_vccz .LBB13_2
-; GFX10-NEXT:  ; %bb.1: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB13_3
-; GFX10-NEXT:  .LBB13_2: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB13_3
-; GFX10-NEXT:  .LBB13_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_eq_zero_compare:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT:    s_cbranch_vccz .LBB13_2
-; GFX11-NEXT:  ; %bb.1: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB13_3
-; GFX11-NEXT:  .LBB13_2: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB13_3
-; GFX11-NEXT:  .LBB13_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-GISEL-NEXT:    s_cmp_lg_u32 vcc_lo, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc0 .LBB13_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB13_3
-; GFX10-GISEL-NEXT:  .LBB13_2: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB13_3
-; GFX10-GISEL-NEXT:  .LBB13_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB13_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB13_3
-; GFX11-GISEL-NEXT:  .LBB13_2: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB13_3
-; GFX11-GISEL-NEXT:  .LBB13_3:
+; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    s_cbranch_vccz .LBB13_2
+; CHECK-NEXT:  ; %bb.1: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB13_3
+; CHECK-NEXT:  .LBB13_2: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB13_3
+; CHECK-NEXT:  .LBB13_3:
   %c = icmp ult i32 %v, 12
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -666,17 +243,6 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
-; CHECK-BOTH-LABEL: branch_uniform_ballot_eq_zero_compare:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    s_cmp_lt_u32 s0, 12
-; CHECK-BOTH-NEXT:    s_cbranch_scc1 .LBB14_2
-; CHECK-BOTH-NEXT:  ; %bb.1: ; %true
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT:    s_branch .LBB14_3
-; CHECK-BOTH-NEXT:  .LBB14_2: ; %false
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT:    s_branch .LBB14_3
-; CHECK-BOTH-NEXT:  .LBB14_3:
 ; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_cmp_lt_u32 s0, 12
@@ -699,76 +265,19 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_and:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-SDAG-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB15_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB15_3
-; CHECK-SDAG-NEXT:  .LBB15_2: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB15_3
-; CHECK-SDAG-NEXT:  .LBB15_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_ne_zero_and:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; GFX10-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; GFX10-NEXT:    s_cbranch_vccz .LBB15_2
-; GFX10-NEXT:  ; %bb.1: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB15_3
-; GFX10-NEXT:  .LBB15_2: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB15_3
-; GFX10-NEXT:  .LBB15_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_ne_zero_and:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; GFX11-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; GFX11-NEXT:    s_cbranch_vccz .LBB15_2
-; GFX11-NEXT:  ; %bb.1: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB15_3
-; GFX11-NEXT:  .LBB15_2: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB15_3
-; GFX11-NEXT:  .LBB15_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; GFX10-GISEL-NEXT:    s_and_b32 s0, vcc_lo, s0
-; GFX10-GISEL-NEXT:    s_cmp_eq_u32 s0, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB15_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB15_3
-; GFX10-GISEL-NEXT:  .LBB15_2: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB15_3
-; GFX10-GISEL-NEXT:  .LBB15_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB15_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB15_3
-; GFX11-GISEL-NEXT:  .LBB15_2: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB15_3
-; GFX11-GISEL-NEXT:  .LBB15_3:
+; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-NEXT:    s_cbranch_vccz .LBB15_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB15_3
+; CHECK-NEXT:  .LBB15_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB15_3
+; CHECK-NEXT:  .LBB15_3:
   %v1c = icmp ult i32 %v1, 12
   %v2c = icmp ugt i32 %v2, 34
   %c = and i1 %v1c, %v2c
@@ -782,90 +291,22 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_and:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    s_cmp_gt_u32 s0, 11
-; CHECK-SDAG-NEXT:    s_cselect_b32 s0, -1, 0
-; CHECK-SDAG-NEXT:    s_cmp_lt_u32 s1, 35
-; CHECK-SDAG-NEXT:    s_cselect_b32 s1, -1, 0
-; CHECK-SDAG-NEXT:    s_or_b32 s0, s0, s1
-; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-SDAG-NEXT:    s_cbranch_vccnz .LBB16_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB16_3
-; CHECK-SDAG-NEXT:  .LBB16_2: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB16_3
-; CHECK-SDAG-NEXT:  .LBB16_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_ne_zero_and:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_cmp_gt_u32 s0, 11
-; GFX10-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX10-NEXT:    s_cmp_lt_u32 s1, 35
-; GFX10-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX10-NEXT:    s_or_b32 s0, s0, s1
-; GFX10-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; GFX10-NEXT:    s_cbranch_vccnz .LBB16_2
-; GFX10-NEXT:  ; %bb.1: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB16_3
-; GFX10-NEXT:  .LBB16_2: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB16_3
-; GFX10-NEXT:  .LBB16_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_ne_zero_and:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_cmp_gt_u32 s0, 11
-; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX11-NEXT:    s_cmp_lt_u32 s1, 35
-; GFX11-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX11-NEXT:    s_or_b32 s0, s0, s1
-; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-NEXT:    s_cbranch_vccnz .LBB16_2
-; GFX11-NEXT:  ; %bb.1: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB16_3
-; GFX11-NEXT:  .LBB16_2: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB16_3
-; GFX11-NEXT:  .LBB16_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_cmp_ge_u32 s0, 12
-; GFX10-GISEL-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX10-GISEL-NEXT:    s_cmp_le_u32 s1, 34
-; GFX10-GISEL-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10-GISEL-NEXT:    s_or_b32 s0, s0, s1
-; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB16_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB16_3
-; GFX10-GISEL-NEXT:  .LBB16_2: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB16_3
-; GFX10-GISEL-NEXT:  .LBB16_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_cmp_gt_u32 s0, 11
-; GFX11-GISEL-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX11-GISEL-NEXT:    s_cmp_lt_u32 s1, 35
-; GFX11-GISEL-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX11-GISEL-NEXT:    s_or_b32 s0, s0, s1
-; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-GISEL-NEXT:    s_cbranch_vccnz .LBB16_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB16_3
-; GFX11-GISEL-NEXT:  .LBB16_2: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB16_3
-; GFX11-GISEL-NEXT:  .LBB16_3:
+; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_cmp_gt_u32 s0, 11
+; CHECK-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-NEXT:    s_cmp_lt_u32 s1, 35
+; CHECK-NEXT:    s_cselect_b32 s1, -1, 0
+; CHECK-NEXT:    s_or_b32 s0, s0, s1
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-NEXT:    s_cbranch_vccnz .LBB16_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB16_3
+; CHECK-NEXT:  .LBB16_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB16_3
+; CHECK-NEXT:  .LBB16_3:
   %v1c = icmp ult i32 %v1, 12
   %v2c = icmp ugt i32 %v2, 34
   %c = and i1 %v1c, %v2c
@@ -879,75 +320,19 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_and:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-SDAG-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB17_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB17_3
-; CHECK-SDAG-NEXT:  .LBB17_2: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB17_3
-; CHECK-SDAG-NEXT:  .LBB17_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_eq_zero_and:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; GFX10-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; GFX10-NEXT:    s_cbranch_vccz .LBB17_2
-; GFX10-NEXT:  ; %bb.1: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB17_3
-; GFX10-NEXT:  .LBB17_2: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB17_3
-; GFX10-NEXT:  .LBB17_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_eq_zero_and:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; GFX11-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; GFX11-NEXT:    s_cbranch_vccz .LBB17_2
-; GFX11-NEXT:  ; %bb.1: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB17_3
-; GFX11-NEXT:  .LBB17_2: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB17_3
-; GFX11-NEXT:  .LBB17_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; GFX10-GISEL-NEXT:    s_and_b32 s0, vcc_lo, s0
-; GFX10-GISEL-NEXT:    s_cbranch_scc0 .LBB17_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB17_3
-; GFX10-GISEL-NEXT:  .LBB17_2: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB17_3
-; GFX10-GISEL-NEXT:  .LBB17_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB17_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB17_3
-; GFX11-GISEL-NEXT:  .LBB17_2: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB17_3
-; GFX11-GISEL-NEXT:  .LBB17_3:
+; CHECK-LABEL: branch_divergent_ballot_eq_zero_and:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-NEXT:    s_cbranch_vccz .LBB17_2
+; CHECK-NEXT:  ; %bb.1: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB17_3
+; CHECK-NEXT:  .LBB17_2: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB17_3
+; CHECK-NEXT:  .LBB17_3:
   %v1c = icmp ult i32 %v1, 12
   %v2c = icmp ugt i32 %v2, 34
   %c = and i1 %v1c, %v2c
@@ -961,90 +346,22 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_and:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    s_cmp_lt_u32 s0, 12
-; CHECK-SDAG-NEXT:    s_cselect_b32 s0, -1, 0
-; CHECK-SDAG-NEXT:    s_cmp_gt_u32 s1, 34
-; CHECK-SDAG-NEXT:    s_cselect_b32 s1, -1, 0
-; CHECK-SDAG-NEXT:    s_and_b32 s0, s0, s1
-; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-SDAG-NEXT:    s_cbranch_vccnz .LBB18_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB18_3
-; CHECK-SDAG-NEXT:  .LBB18_2: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB18_3
-; CHECK-SDAG-NEXT:  .LBB18_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_eq_zero_and:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_cmp_lt_u32 s0, 12
-; GFX10-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX10-NEXT:    s_cmp_gt_u32 s1, 34
-; GFX10-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX10-NEXT:    s_and_b32 s0, s0, s1
-; GFX10-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; GFX10-NEXT:    s_cbranch_vccnz .LBB18_2
-; GFX10-NEXT:  ; %bb.1: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB18_3
-; GFX10-NEXT:  .LBB18_2: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB18_3
-; GFX10-NEXT:  .LBB18_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_eq_zero_and:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_cmp_lt_u32 s0, 12
-; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX11-NEXT:    s_cmp_gt_u32 s1, 34
-; GFX11-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX11-NEXT:    s_and_b32 s0, s0, s1
-; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-NEXT:    s_cbranch_vccnz .LBB18_2
-; GFX11-NEXT:  ; %bb.1: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB18_3
-; GFX11-NEXT:  .LBB18_2: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB18_3
-; GFX11-NEXT:  .LBB18_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_cmp_lt_u32 s0, 12
-; GFX10-GISEL-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX10-GISEL-NEXT:    s_cmp_gt_u32 s1, 34
-; GFX10-GISEL-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, s1
-; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB18_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB18_3
-; GFX10-GISEL-NEXT:  .LBB18_2: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB18_3
-; GFX10-GISEL-NEXT:  .LBB18_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_cmp_lt_u32 s0, 12
-; GFX11-GISEL-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX11-GISEL-NEXT:    s_cmp_gt_u32 s1, 34
-; GFX11-GISEL-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX11-GISEL-NEXT:    s_and_b32 s0, s0, s1
-; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-GISEL-NEXT:    s_cbranch_vccnz .LBB18_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB18_3
-; GFX11-GISEL-NEXT:  .LBB18_2: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB18_3
-; GFX11-GISEL-NEXT:  .LBB18_3:
+; CHECK-LABEL: branch_uniform_ballot_eq_zero_and:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_cmp_lt_u32 s0, 12
+; CHECK-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-NEXT:    s_cmp_gt_u32 s1, 34
+; CHECK-NEXT:    s_cselect_b32 s1, -1, 0
+; CHECK-NEXT:    s_and_b32 s0, s0, s1
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-NEXT:    s_cbranch_vccnz .LBB18_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB18_3
+; CHECK-NEXT:  .LBB18_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB18_3
+; CHECK-NEXT:  .LBB18_3:
   %v1c = icmp ult i32 %v1, 12
   %v2c = icmp ugt i32 %v2, 34
   %c = and i1 %v1c, %v2c
@@ -1058,72 +375,18 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_sgt_N_compare:
-; CHECK-SDAG:       ; %bb.0:
-; CHECK-SDAG-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
-; CHECK-SDAG-NEXT:    s_cmp_lt_i32 s0, 23
-; CHECK-SDAG-NEXT:    s_cbranch_scc1 .LBB19_2
-; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT:    s_branch .LBB19_3
-; CHECK-SDAG-NEXT:  .LBB19_2: ; %false
-; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT:    s_branch .LBB19_3
-; CHECK-SDAG-NEXT:  .LBB19_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_sgt_N_compare:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
-; GFX10-NEXT:    s_cmp_lt_i32 s0, 23
-; GFX10-NEXT:    s_cbranch_scc1 .LBB19_2
-; GFX10-NEXT:  ; %bb.1: ; %true
-; GFX10-NEXT:    s_mov_b32 s0, 42
-; GFX10-NEXT:    s_branch .LBB19_3
-; GFX10-NEXT:  .LBB19_2: ; %false
-; GFX10-NEXT:    s_mov_b32 s0, 33
-; GFX10-NEXT:    s_branch .LBB19_3
-; GFX10-NEXT:  .LBB19_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_sgt_N_compare:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
-; GFX11-NEXT:    s_cmp_lt_i32 s0, 23
-; GFX11-NEXT:    s_cbranch_scc1 .LBB19_2
-; GFX11-NEXT:  ; %bb.1: ; %true
-; GFX11-NEXT:    s_mov_b32 s0, 42
-; GFX11-NEXT:    s_branch .LBB19_3
-; GFX11-NEXT:  .LBB19_2: ; %false
-; GFX11-NEXT:    s_mov_b32 s0, 33
-; GFX11-NEXT:    s_branch .LBB19_3
-; GFX11-NEXT:  .LBB19_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_cmp_lt_u32 s0, 12
-; GFX10-GISEL-NEXT:    s_cselect_b32 s0, exec_lo, 0
-; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, exec_lo
-; GFX10-GISEL-NEXT:    s_cmp_le_i32 s0, 22
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB19_2
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT:    s_branch .LBB19_3
-; GFX10-GISEL-NEXT:  .LBB19_2: ; %false
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT:    s_branch .LBB19_3
-; GFX10-GISEL-NEXT:  .LBB19_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
-; GFX11-GISEL-NEXT:    s_cmp_lt_i32 s0, 23
-; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB19_2
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT:    s_branch .LBB19_3
-; GFX11-GISEL-NEXT:  .LBB19_2: ; %false
-; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT:    s_branch .LBB19_3
-; GFX11-GISEL-NEXT:  .LBB19_3:
+; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, s0, 12
+; CHECK-NEXT:    s_cmp_lt_i32 s0, 23
+; CHECK-NEXT:    s_cbranch_scc1 .LBB19_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB19_3
+; CHECK-NEXT:  .LBB19_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB19_3
+; CHECK-NEXT:  .LBB19_3:
   %c = icmp ult i32 %v, 12
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %bc = icmp sgt i32 %ballot, 22
@@ -1137,19 +400,6 @@ false:
 declare i32 @llvm.amdgcn.icmp.i32(i1, i1, i32)
 
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
-; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-BOTH-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-BOTH-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-BOTH-NEXT:    s_cbranch_vccnz .LBB20_2
-; CHECK-BOTH-NEXT:  ; %bb.1: ; %true
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT:    s_branch .LBB20_3
-; CHECK-BOTH-NEXT:  .LBB20_2: ; %false
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT:    s_branch .LBB20_3
-; CHECK-BOTH-NEXT:  .LBB20_3:
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -1190,22 +440,6 @@ define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_ne_zero_and(i32 in
 ;   s_cselect_b32 s0, s0, 0
 ;   s_and_b32 s0, s0, exec_lo
 ; By selecting into vcc(_lo) instead, we could even avoid the AND-with-exec.
-; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    s_cmp_lt_u32 s0, 12
-; CHECK-BOTH-NEXT:    s_cselect_b32 s0, -1, 0
-; CHECK-BOTH-NEXT:    s_cmp_gt_u32 s1, 34
-; CHECK-BOTH-NEXT:    s_cselect_b32 s1, -1, 0
-; CHECK-BOTH-NEXT:    s_and_b32 s0, s0, s1
-; CHECK-BOTH-NEXT:    s_and_b32 s0, s0, exec_lo
-; CHECK-BOTH-NEXT:    s_cbranch_scc1 .LBB21_2
-; CHECK-BOTH-NEXT:  ; %bb.1: ; %true
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT:    s_branch .LBB21_3
-; CHECK-BOTH-NEXT:  .LBB21_2: ; %false
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT:    s_branch .LBB21_3
-; CHECK-BOTH-NEXT:  .LBB21_3:
 ; CHECK-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_cmp_lt_u32 s0, 12
@@ -1235,19 +469,6 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) {
-; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-BOTH-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-BOTH-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-BOTH-NEXT:    s_cbranch_vccnz .LBB22_2
-; CHECK-BOTH-NEXT:  ; %bb.1: ; %false
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT:    s_branch .LBB22_3
-; CHECK-BOTH-NEXT:  .LBB22_2: ; %true
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT:    s_branch .LBB22_3
-; CHECK-BOTH-NEXT:  .LBB22_3:
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -1274,22 +495,6 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
-; CHECK-BOTH:       ; %bb.0:
-; CHECK-BOTH-NEXT:    s_cmp_lt_u32 s0, 12
-; CHECK-BOTH-NEXT:    s_cselect_b32 s0, -1, 0
-; CHECK-BOTH-NEXT:    s_cmp_gt_u32 s1, 34
-; CHECK-BOTH-NEXT:    s_cselect_b32 s1, -1, 0
-; CHECK-BOTH-NEXT:    s_and_b32 s0, s0, s1
-; CHECK-BOTH-NEXT:    s_and_b32 s0, s0, exec_lo
-; CHECK-BOTH-NEXT:    s_cbranch_scc1 .LBB23_2
-; CHECK-BOTH-NEXT:  ; %bb.1: ; %false
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT:    s_branch .LBB23_3
-; CHECK-BOTH-NEXT:  .LBB23_2: ; %true
-; CHECK-BOTH-NEXT:    s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT:    s_branch .LBB23_3
-; CHECK-BOTH-NEXT:  .LBB23_3:
 ; CHECK-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_cmp_lt_u32 s0, 12
@@ -1368,58 +573,6 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_endpgm
-;
-; GFX10-GISEL-LABEL: non_cst_non_compare_input:
-; GFX10-GISEL:       ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, 1
-; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v3
-; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT:    s_cselect_b32 s0, exec_lo, 0
-; GFX10-GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
-; GFX10-GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX10-GISEL-NEXT:  ; %bb.1: ; %B
-; GFX10-GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 2, v2
-; GFX10-GISEL-NEXT:    s_andn2_b32 s0, s0, exec_lo
-; GFX10-GISEL-NEXT:    ; implicit-def: $vgpr2
-; GFX10-GISEL-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
-; GFX10-GISEL-NEXT:    s_or_b32 s0, s0, s2
-; GFX10-GISEL-NEXT:  ; %bb.2: ; %Flow
-; GFX10-GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX10-GISEL-NEXT:  ; %bb.3: ; %A
-; GFX10-GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 1, v2
-; GFX10-GISEL-NEXT:    s_andn2_b32 s0, s0, exec_lo
-; GFX10-GISEL-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
-; GFX10-GISEL-NEXT:    s_or_b32 s0, s0, s2
-; GFX10-GISEL-NEXT:  ; %bb.4: ; %exit
-; GFX10-GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, exec_lo
-; GFX10-GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX10-GISEL-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-GISEL-NEXT:    s_endpgm
-;
-; GFX11-GISEL-LABEL: non_cst_non_compare_input:
-; GFX11-GISEL:       ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
-; GFX11-GISEL-NEXT:    ; implicit-def: $sgpr0
-; GFX11-GISEL-NEXT:    v_cmpx_ne_u32_e32 0, v3
-; GFX11-GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX11-GISEL-NEXT:  ; %bb.1: ; %B
-; GFX11-GISEL-NEXT:    v_cmp_gt_u32_e64 s0, 2, v2
-; GFX11-GISEL-NEXT:    ; implicit-def: $vgpr2
-; GFX11-GISEL-NEXT:  ; %bb.2: ; %Flow
-; GFX11-GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX11-GISEL-NEXT:  ; %bb.3: ; %A
-; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
-; GFX11-GISEL-NEXT:    s_and_not1_b32 s0, s0, exec_lo
-; GFX11-GISEL-NEXT:    s_and_b32 s2, vcc_lo, exec_lo
-; GFX11-GISEL-NEXT:    s_or_b32 s0, s0, s2
-; GFX11-GISEL-NEXT:  ; %bb.4: ; %exit
-; GFX11-GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e64 s0, 0, v2
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX11-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-GISEL-NEXT:    s_endpgm
 entry:
   %cmp = icmp eq i32 %cond, 0
   br i1 %cmp, label %A, label %B
@@ -1455,22 +608,6 @@ define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) {
 ; GFX11-NEXT:    v_mov_b16_e32 v1.l, v2.l
 ; GFX11-NEXT:    v_cmp_gt_f32_e64 s0, v1, v2
 ; GFX11-NEXT:    ; return to shader part epilog
-;
-; GFX10-GISEL-LABEL: compare_bfloats:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX10-GISEL-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
-; GFX10-GISEL-NEXT:    ; return to shader part epilog
-;
-; GFX11-GISEL-LABEL: compare_bfloats:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    v_mov_b16_e32 v2.l, 0
-; GFX11-GISEL-NEXT:    v_mov_b16_e32 v2.h, v1.l
-; GFX11-GISEL-NEXT:    v_mov_b16_e32 v1.h, v0.l
-; GFX11-GISEL-NEXT:    v_mov_b16_e32 v1.l, v2.l
-; GFX11-GISEL-NEXT:    v_cmp_gt_f32_e64 s0, v1, v2
-; GFX11-GISEL-NEXT:    ; return to shader part epilog
   %cmp = fcmp ogt bfloat %x, %y
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
   ret i32 %ballot
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
index edda79b813378..678ee5d9e95c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=SDAG-GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=SDAG-GFX10 %s
 
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
 
 declare i32 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
 declare i32 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
index 67a973d3b0e07..80d77c27b3ec2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-SDAG %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-GISEL %s
 
 declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
 declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
index f8ccd40d37bcc..7f79e60857d96 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,SDAG-GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,SDAG-GFX10 %s
 
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
 
 declare i32 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
 declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
@@ -1609,33 +1609,65 @@ define amdgpu_kernel void @v_icmp_i16_sle(ptr addrspace(1) %out, i16 %src) {
 }
 
 define amdgpu_kernel void @v_icmp_i1_ne0(ptr addrspace(1) %out, i32 %a, i32 %b) {
-; GFX11-LABEL: v_icmp_i1_ne0:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_cmp_gt_u32 s2, 1
-; GFX11-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-NEXT:    s_cmp_gt_u32 s3, 2
-; GFX11-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_b32 s2, s2, s3
-; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
-;
-; GFX10-LABEL: v_icmp_i1_ne0:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_cmp_gt_u32 s2, 1
-; GFX10-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX10-NEXT:    s_cmp_gt_u32 s3, 2
-; GFX10-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX10-NEXT:    s_and_b32 s2, s2, s3
-; GFX10-NEXT:    v_mov_b32_e32 v1, s2
-; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10-NEXT:    s_endpgm
+; SDAG-GFX11-LABEL: v_icmp_i1_ne0:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    s_cmp_gt_u32 s2, 1
+; SDAG-GFX11-NEXT:    s_cselect_b32 s2, -1, 0
+; SDAG-GFX11-NEXT:    s_cmp_gt_u32 s3, 2
+; SDAG-GFX11-NEXT:    s_cselect_b32 s3, -1, 0
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG-GFX11-NEXT:    s_and_b32 s2, s2, s3
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i1_ne0:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    s_cmp_gt_u32 s2, 1
+; SDAG-GFX10-NEXT:    s_cselect_b32 s2, -1, 0
+; SDAG-GFX10-NEXT:    s_cmp_gt_u32 s3, 2
+; SDAG-GFX10-NEXT:    s_cselect_b32 s3, -1, 0
+; SDAG-GFX10-NEXT:    s_and_b32 s2, s2, s3
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i1_ne0:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    s_cmp_gt_u32 s2, 1
+; GISEL-GFX11-NEXT:    s_cselect_b32 s2, 1, 0
+; GISEL-GFX11-NEXT:    s_cmp_gt_u32 s3, 2
+; GISEL-GFX11-NEXT:    s_cselect_b32 s3, 1, 0
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-GFX11-NEXT:    s_and_b32 s2, s2, s3
+; GISEL-GFX11-NEXT:    v_cmp_ne_u32_e64 s2, s2, 0
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i1_ne0:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    s_cmp_gt_u32 s2, 1
+; GISEL-GFX10-NEXT:    s_cselect_b32 s2, 1, 0
+; GISEL-GFX10-NEXT:    s_cmp_gt_u32 s3, 2
+; GISEL-GFX10-NEXT:    s_cselect_b32 s3, 1, 0
+; GISEL-GFX10-NEXT:    s_and_b32 s2, s2, s3
+; GISEL-GFX10-NEXT:    v_cmp_ne_u32_e64 s2, s2, 0
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
   %c0 = icmp ugt i32 %a, 1
   %c1 = icmp ugt i32 %b, 2
   %src = and i1 %c0, %c1
@@ -1670,3 +1702,5 @@ define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(ptr addrspace(1) %out, i32
 attributes #0 = { nounwind readnone convergent }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
+; GFX10: {{.*}}
+; GFX11: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
index e6278cf2a52ee..aca943eb6f839 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
@@ -3,9 +3,9 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,SDAG-VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,SDAG-GFX9 %s
 
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
 
 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
@@ -1877,52 +1877,103 @@ define amdgpu_kernel void @v_icmp_i16_sle(ptr addrspace(1) %out, i16 %src) {
 }
 
 define amdgpu_kernel void @v_icmp_i1_ne0(ptr addrspace(1) %out, i32 %a, i32 %b) {
-; GFX11-LABEL: v_icmp_i1_ne0:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    v_mov_b32_e32 v2, 0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_cmp_gt_u32 s2, 1
-; GFX11-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX11-NEXT:    s_cmp_gt_u32 s3, 2
-; GFX11-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
-; GFX11-NEXT:    v_mov_b32_e32 v0, s2
-; GFX11-NEXT:    v_mov_b32_e32 v1, s3
-; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX11-NEXT:    s_endpgm
+; SDAG-GFX11-LABEL: v_icmp_i1_ne0:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; SDAG-GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    s_cmp_gt_u32 s2, 1
+; SDAG-GFX11-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; SDAG-GFX11-NEXT:    s_cmp_gt_u32 s3, 2
+; SDAG-GFX11-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG-GFX11-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; SDAG-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; SDAG-GFX11-NEXT:    s_endpgm
 ;
-; VI-LABEL: v_icmp_i1_ne0:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_cmp_gt_u32 s2, 1
-; VI-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; VI-NEXT:    s_cmp_gt_u32 s3, 2
-; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; VI-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: v_icmp_i1_ne0:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_cmp_gt_u32 s2, 1
-; GFX9-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX9-NEXT:    s_cmp_gt_u32 s3, 2
-; GFX9-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX9-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT:    s_endpgm
+; SDAG-VI-LABEL: v_icmp_i1_ne0:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    s_cmp_gt_u32 s2, 1
+; SDAG-VI-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; SDAG-VI-NEXT:    s_cmp_gt_u32 s3, 2
+; SDAG-VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; SDAG-VI-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i1_ne0:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    s_cmp_gt_u32 s2, 1
+; SDAG-GFX9-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; SDAG-GFX9-NEXT:    s_cmp_gt_u32 s3, 2
+; SDAG-GFX9-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; SDAG-GFX9-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i1_ne0:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    s_cmp_gt_u32 s2, 1
+; GISEL-GFX11-NEXT:    s_cselect_b32 s2, 1, 0
+; GISEL-GFX11-NEXT:    s_cmp_gt_u32 s3, 2
+; GISEL-GFX11-NEXT:    s_cselect_b32 s3, 1, 0
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-GFX11-NEXT:    s_and_b32 s2, s2, s3
+; GISEL-GFX11-NEXT:    v_cmp_ne_u32_e64 s[2:3], s2, 0
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i1_ne0:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    s_cmp_gt_u32 s2, 1
+; GISEL-VI-NEXT:    s_cselect_b32 s2, 1, 0
+; GISEL-VI-NEXT:    s_cmp_gt_u32 s3, 2
+; GISEL-VI-NEXT:    s_cselect_b32 s3, 1, 0
+; GISEL-VI-NEXT:    s_and_b32 s2, s2, s3
+; GISEL-VI-NEXT:    v_cmp_ne_u32_e64 s[2:3], s2, 0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i1_ne0:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    s_cmp_gt_u32 s2, 1
+; GISEL-GFX9-NEXT:    s_cselect_b32 s2, 1, 0
+; GISEL-GFX9-NEXT:    s_cmp_gt_u32 s3, 2
+; GISEL-GFX9-NEXT:    s_cselect_b32 s3, 1, 0
+; GISEL-GFX9-NEXT:    s_and_b32 s2, s2, s3
+; GISEL-GFX9-NEXT:    v_cmp_ne_u32_e64 s[2:3], s2, 0
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
   %c0 = icmp ugt i32 %a, 1
   %c1 = icmp ugt i32 %b, 2
   %src = and i1 %c0, %c1
@@ -1966,3 +2017,4 @@ define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(ptr addrspace(1) %out, i32
 attributes #0 = { nounwind readnone convergent }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
+; VI: {{.*}}



More information about the llvm-commits mailing list