[llvm] AMDGPU: Add register bank legalize rules for amdgcn_icmp, amdgcn_fcmp and amdgcn_ballot. (PR #172017)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 12 06:52:24 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: None (anjenner)

<details>
<summary>Changes</summary>

I'm not entirely sure this is correct. I needed to change the DstOpMapping for the S1 rules for amdgcn_fcmp and amdgcn_icmp from {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg} to {IntrId, Vcc, Vcc}. The result does seem to work (the tests pass). There are some differences in the generated assembly between GFX10 and GFX11 for gisel which are not there for sdag, but these don't seem to be related to this patch, and may be expected.

---

Patch is 54.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172017.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+26) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll (+1023-160) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index d01afee331025..48a29320a998c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1018,4 +1018,30 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       // readfirstlaning just in case register is not in sgpr.
       .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
 
+  addRulesForIOpcs({amdgcn_icmp})
+      .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+      .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+      .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+      .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+
+  addRulesForIOpcs({amdgcn_fcmp})
+      .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+      .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+      .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+      .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+      .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+
+  addRulesForIOpcs({amdgcn_ballot}, Standard)
+      .Uni(S64, {{Sgpr64}, {None, Vcc}})
+      .Uni(S32, {{Sgpr32}, {None, Vcc}});
+
 } // end initialize rules
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index c1f3a12dba578..3b93164539df7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -1,6 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --no-generate-body-for-unused-prefixes
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select -global-isel-abort=0 < %s | FileCheck -check-prefixes=CHECK,GFX10-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11-GISEL %s
 
 declare i32 @llvm.amdgcn.ballot.i32(i1)
 declare i32 @llvm.ctpop.i32(i32)
@@ -8,6 +10,10 @@ declare i32 @llvm.ctpop.i32(i32)
 ; Test ballot(0)
 
 define amdgpu_cs i32 @constant_false() {
+; CHECK-BOTH-LABEL: constant_false:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    s_mov_b32 s0, 0
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: constant_false:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_mov_b32 s0, 0
@@ -19,10 +25,30 @@ define amdgpu_cs i32 @constant_false() {
 ; Test ballot(1)
 
 define amdgpu_cs i32 @constant_true() {
-; CHECK-LABEL: constant_true:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_mov_b32 s0, exec_lo
-; CHECK-NEXT:    ; return to shader part epilog
+; CHECK-SDAG-LABEL: constant_true:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, exec_lo
+; CHECK-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: constant_true:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_mov_b32 s0, exec_lo
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: constant_true:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s0, exec_lo
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: constant_true:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_and_b32 s0, exec_lo, exec_lo
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: constant_true:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, exec_lo
+; GFX11-GISEL-NEXT:    ; return to shader part epilog
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)
   ret i32 %ballot
 }
@@ -30,6 +56,11 @@ define amdgpu_cs i32 @constant_true() {
 ; Test ballot of a non-comparison operation
 
 define amdgpu_cs i32 @non_compare(i32 %x) {
+; CHECK-BOTH-LABEL: non_compare:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-BOTH-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: non_compare:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -43,6 +74,10 @@ define amdgpu_cs i32 @non_compare(i32 %x) {
 ; Test ballot of comparisons
 
 define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
+; CHECK-BOTH-LABEL: compare_ints:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: compare_ints:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
@@ -53,16 +88,40 @@ define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
 }
 
 define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
-; CHECK-LABEL: compare_int_with_constant:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
-; CHECK-NEXT:    ; return to shader part epilog
+; CHECK-SDAG-LABEL: compare_int_with_constant:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; CHECK-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: compare_int_with_constant:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: compare_int_with_constant:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: compare_int_with_constant:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_cmp_le_i32_e64 s0, 0x63, v0
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: compare_int_with_constant:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX11-GISEL-NEXT:    ; return to shader part epilog
   %cmp = icmp sge i32 %x, 99
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
   ret i32 %ballot
 }
 
 define amdgpu_cs i32 @compare_floats(float %x, float %y) {
+; CHECK-BOTH-LABEL: compare_floats:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: compare_floats:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
@@ -73,6 +132,11 @@ define amdgpu_cs i32 @compare_floats(float %x, float %y) {
 }
 
 define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
+; CHECK-BOTH-LABEL: ctpop_of_ballot:
+; CHECK-BOTH:       ; %bb.0:
+; CHECK-BOTH-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v0, v1
+; CHECK-BOTH-NEXT:    s_bcnt1_i32_b32 s0, vcc_lo
+; CHECK-BOTH-NEXT:    ; return to shader part epilog
 ; CHECK-LABEL: ctpop_of_ballot:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v0, v1
@@ -85,18 +149,71 @@ define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-NEXT:    s_cbranch_vccz .LBB7_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB7_3
-; CHECK-NEXT:  .LBB7_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB7_3
-; CHECK-NEXT:  .LBB7_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB7_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB7_3
+; CHECK-SDAG-NEXT:  .LBB7_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB7_3
+; CHECK-SDAG-NEXT:  .LBB7_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-NEXT:    s_cbranch_vccz .LBB7_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB7_3
+; GFX10-NEXT:  .LBB7_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB7_3
+; GFX10-NEXT:  .LBB7_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    s_cbranch_vccz .LBB7_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB7_3
+; GFX11-NEXT:  .LBB7_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB7_3
+; GFX11-NEXT:  .LBB7_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    s_cmp_eq_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB7_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB7_3
+; GFX10-GISEL-NEXT:  .LBB7_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB7_3
+; GFX10-GISEL-NEXT:  .LBB7_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB7_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB7_3
+; GFX11-GISEL-NEXT:  .LBB7_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB7_3
+; GFX11-GISEL-NEXT:  .LBB7_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -108,17 +225,67 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_bitcmp0_b32 s0, 0
-; CHECK-NEXT:    s_cbranch_scc1 .LBB8_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB8_3
-; CHECK-NEXT:  .LBB8_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB8_3
-; CHECK-NEXT:  .LBB8_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_bitcmp0_b32 s0, 0
+; CHECK-SDAG-NEXT:    s_cbranch_scc1 .LBB8_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB8_3
+; CHECK-SDAG-NEXT:  .LBB8_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB8_3
+; CHECK-SDAG-NEXT:  .LBB8_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_bitcmp0_b32 s0, 0
+; GFX10-NEXT:    s_cbranch_scc1 .LBB8_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB8_3
+; GFX10-NEXT:  .LBB8_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB8_3
+; GFX10-NEXT:  .LBB8_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_bitcmp0_b32 s0, 0
+; GFX11-NEXT:    s_cbranch_scc1 .LBB8_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB8_3
+; GFX11-NEXT:  .LBB8_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB8_3
+; GFX11-NEXT:  .LBB8_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB8_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB8_3
+; GFX10-GISEL-NEXT:  .LBB8_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB8_3
+; GFX10-GISEL-NEXT:  .LBB8_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_bitcmp0_b32 s0, 0
+; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB8_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB8_3
+; GFX11-GISEL-NEXT:  .LBB8_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB8_3
+; GFX11-GISEL-NEXT:  .LBB8_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -130,18 +297,71 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-NEXT:    s_cbranch_vccz .LBB9_2
-; CHECK-NEXT:  ; %bb.1: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB9_3
-; CHECK-NEXT:  .LBB9_2: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB9_3
-; CHECK-NEXT:  .LBB9_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-SDAG-NEXT:    s_cbranch_vccz .LBB9_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB9_3
+; CHECK-SDAG-NEXT:  .LBB9_2: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB9_3
+; CHECK-SDAG-NEXT:  .LBB9_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-NEXT:    s_cbranch_vccz .LBB9_2
+; GFX10-NEXT:  ; %bb.1: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB9_3
+; GFX10-NEXT:  .LBB9_2: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB9_3
+; GFX10-NEXT:  .LBB9_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    s_cbranch_vccz .LBB9_2
+; GFX11-NEXT:  ; %bb.1: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB9_3
+; GFX11-NEXT:  .LBB9_2: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB9_3
+; GFX11-NEXT:  .LBB9_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc0 .LBB9_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB9_3
+; GFX10-GISEL-NEXT:  .LBB9_2: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB9_3
+; GFX10-GISEL-NEXT:  .LBB9_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT:    s_cbranch_vccz .LBB9_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB9_3
+; GFX11-GISEL-NEXT:  .LBB9_2: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB9_3
+; GFX11-GISEL-NEXT:  .LBB9_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -153,19 +373,76 @@ false:
 }
 
 define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_bitcmp1_b32 s0, 0
-; CHECK-NEXT:    s_cselect_b32 s0, -1, 0
-; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT:    s_cbranch_vccnz .LBB10_2
-; CHECK-NEXT:  ; %bb.1: ; %true
-; CHECK-NEXT:    s_mov_b32 s0, 42
-; CHECK-NEXT:    s_branch .LBB10_3
-; CHECK-NEXT:  .LBB10_2: ; %false
-; CHECK-NEXT:    s_mov_b32 s0, 33
-; CHECK-NEXT:    s_branch .LBB10_3
-; CHECK-NEXT:  .LBB10_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_bitcmp1_b32 s0, 0
+; CHECK-SDAG-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT:    s_cbranch_vccnz .LBB10_2
+; CHECK-SDAG-NEXT:  ; %bb.1: ; %true
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT:    s_branch .LBB10_3
+; CHECK-SDAG-NEXT:  .LBB10_2: ; %false
+; CHECK-SDAG-NEXT:    s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT:    s_branch .LBB10_3
+; CHECK-SDAG-NEXT:  .LBB10_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_bitcmp1_b32 s0, 0
+; GFX10-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX10-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT:    s_cbranch_vccnz .LBB10_2
+; GFX10-NEXT:  ; %bb.1: ; %true
+; GFX10-NEXT:    s_mov_b32 s0, 42
+; GFX10-NEXT:    s_branch .LBB10_3
+; GFX10-NEXT:  .LBB10_2: ; %false
+; GFX10-NEXT:    s_mov_b32 s0, 33
+; GFX10-NEXT:    s_branch .LBB10_3
+; GFX10-NEXT:  .LBB10_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_bitcmp1_b32 s0, 0
+; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT:    s_cbranch_vccnz .LBB10_2
+; GFX11-NEXT:  ; %bb.1: ; %true
+; GFX11-NEXT:    s_mov_b32 s0, 42
+; GFX11-NEXT:    s_branch .LBB10_3
+; GFX11-NEXT:  .LBB10_2: ; %false
+; GFX11-NEXT:    s_mov_b32 s0, 33
+; GFX11-NEXT:    s_branch .LBB10_3
+; GFX11-NEXT:  .LBB10_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB10_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT:    s_branch .LBB10_3
+; GFX10-GISEL-NEXT:  .LBB10_2: ; %false
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT:    s_branch .LBB10_3
+; GFX10-GISEL-NEXT:  .LBB10_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_bitcmp1_b32 s0, 0
+; GFX11-GISEL-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT:    s_cbranch_vccnz .LBB10_2
+; GFX11-GISEL-NEXT:  ; %bb.1: ; %true
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT:    s_branch .LBB10_3
+; GFX11-GISEL-NEXT:  .LBB10_2: ; %false
+; GFX11-GISEL-NEXT:    s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT:    s_branch .LBB10_3
+; GFX11-GISEL-NEXT:  .LBB10_3:
   %c = trunc i32 %v to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
   %ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -177,17 +454,66 @@ false:
 }
 
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_ne_ze...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/172017


More information about the llvm-commits mailing list