[llvm] AMDGPU: Add register bank legalize rules for amdgcn_icmp, amdgcn_fcmp and amdgcn_ballot. (PR #172017)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 06:51:53 PST 2025
https://github.com/anjenner created https://github.com/llvm/llvm-project/pull/172017
I'm not entirely sure this is correct. I needed to change the DstOpMapping for the S1 rules for amdgcn_fcmp and amdgcn_icmp from {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg} to {IntrId, Vcc, Vcc}. The result does seem to work (the tests pass). There are some differences in the generated assembly between GFX10 and GFX11 for gisel which are not there for sdag, but these don't seem to be related to this patch, and may be expected.
>From 4050d7d4e052fbf59d211f960a1379c371e26e47 Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Fri, 12 Dec 2025 08:53:17 -0500
Subject: [PATCH 1/2] AMDGPU: Add register bank legalize rules for amdgcn_icmp,
amdgcn_fcmp and amdgcn_ballot.
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 30 +
.../CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll | 1183 ++++++++++++++---
2 files changed, 1053 insertions(+), 160 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index d01afee331025..05ba285bd1379 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1018,4 +1018,34 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
// readfirstlaning just in case register is not in sgpr.
.Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
+ addRulesForIOpcs({amdgcn_icmp})
+ .Any({{UniS64, _, S1},
+ {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+ .Any({{UniS32, _, S1},
+ {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+
+ addRulesForIOpcs({amdgcn_fcmp})
+ .Any({{UniS64, _, S1},
+ {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+ .Any({{UniS32, _, S1},
+ {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+
+ addRulesForIOpcs({amdgcn_ballot}, Standard)
+ .Uni(S64, {{Sgpr64}, {None, Vcc}})
+ .Uni(S32, {{Sgpr32}, {None, Vcc}});
+
} // end initialize rules
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index c1f3a12dba578..3b93164539df7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -1,6 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --no-generate-body-for-unused-prefixes
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select -global-isel-abort=0 < %s | FileCheck -check-prefixes=CHECK,GFX10-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11-GISEL %s
declare i32 @llvm.amdgcn.ballot.i32(i1)
declare i32 @llvm.ctpop.i32(i32)
@@ -8,6 +10,10 @@ declare i32 @llvm.ctpop.i32(i32)
; Test ballot(0)
define amdgpu_cs i32 @constant_false() {
+; CHECK-BOTH-LABEL: constant_false:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 0
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: constant_false:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_mov_b32 s0, 0
@@ -19,10 +25,30 @@ define amdgpu_cs i32 @constant_false() {
; Test ballot(1)
define amdgpu_cs i32 @constant_true() {
-; CHECK-LABEL: constant_true:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_mov_b32 s0, exec_lo
-; CHECK-NEXT: ; return to shader part epilog
+; CHECK-SDAG-LABEL: constant_true:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_mov_b32 s0, exec_lo
+; CHECK-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: constant_true:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_mov_b32 s0, exec_lo
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: constant_true:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: constant_true:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_and_b32 s0, exec_lo, exec_lo
+; GFX10-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: constant_true:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-GISEL-NEXT: ; return to shader part epilog
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)
ret i32 %ballot
}
@@ -30,6 +56,11 @@ define amdgpu_cs i32 @constant_true() {
; Test ballot of a non-comparison operation
define amdgpu_cs i32 @non_compare(i32 %x) {
+; CHECK-BOTH-LABEL: non_compare:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-BOTH-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: non_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
@@ -43,6 +74,10 @@ define amdgpu_cs i32 @non_compare(i32 %x) {
; Test ballot of comparisons
define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
+; CHECK-BOTH-LABEL: compare_ints:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_eq_u32_e64 s0, v0, v1
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: compare_ints:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_eq_u32_e64 s0, v0, v1
@@ -53,16 +88,40 @@ define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
}
define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
-; CHECK-LABEL: compare_int_with_constant:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
-; CHECK-NEXT: ; return to shader part epilog
+; CHECK-SDAG-LABEL: compare_int_with_constant:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
+; CHECK-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: compare_int_with_constant:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: compare_int_with_constant:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: compare_int_with_constant:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_le_i32_e64 s0, 0x63, v0
+; GFX10-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: compare_int_with_constant:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX11-GISEL-NEXT: ; return to shader part epilog
%cmp = icmp sge i32 %x, 99
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
ret i32 %ballot
}
define amdgpu_cs i32 @compare_floats(float %x, float %y) {
+; CHECK-BOTH-LABEL: compare_floats:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: compare_floats:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
@@ -73,6 +132,11 @@ define amdgpu_cs i32 @compare_floats(float %x, float %y) {
}
define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
+; CHECK-BOTH-LABEL: ctpop_of_ballot:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
+; CHECK-BOTH-NEXT: s_bcnt1_i32_b32 s0, vcc_lo
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: ctpop_of_ballot:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
@@ -85,18 +149,71 @@ define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
}
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-NEXT: s_cbranch_vccz .LBB7_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB7_3
-; CHECK-NEXT: .LBB7_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB7_3
-; CHECK-NEXT: .LBB7_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB7_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB7_3
+; CHECK-SDAG-NEXT: .LBB7_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB7_3
+; CHECK-SDAG-NEXT: .LBB7_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-NEXT: s_cbranch_vccz .LBB7_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB7_3
+; GFX10-NEXT: .LBB7_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB7_3
+; GFX10-NEXT: .LBB7_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: s_cbranch_vccz .LBB7_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB7_3
+; GFX11-NEXT: .LBB7_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB7_3
+; GFX11-NEXT: .LBB7_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT: s_cmp_eq_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB7_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB7_3
+; GFX10-GISEL-NEXT: .LBB7_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB7_3
+; GFX10-GISEL-NEXT: .LBB7_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB7_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB7_3
+; GFX11-GISEL-NEXT: .LBB7_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB7_3
+; GFX11-GISEL-NEXT: .LBB7_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -108,17 +225,67 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_bitcmp0_b32 s0, 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB8_3
-; CHECK-NEXT: .LBB8_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB8_3
-; CHECK-NEXT: .LBB8_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_bitcmp0_b32 s0, 0
+; CHECK-SDAG-NEXT: s_cbranch_scc1 .LBB8_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB8_3
+; CHECK-SDAG-NEXT: .LBB8_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB8_3
+; CHECK-SDAG-NEXT: .LBB8_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_bitcmp0_b32 s0, 0
+; GFX10-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB8_3
+; GFX10-NEXT: .LBB8_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB8_3
+; GFX10-NEXT: .LBB8_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_bitcmp0_b32 s0, 0
+; GFX11-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB8_3
+; GFX11-NEXT: .LBB8_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB8_3
+; GFX11-NEXT: .LBB8_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB8_3
+; GFX10-GISEL-NEXT: .LBB8_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB8_3
+; GFX10-GISEL-NEXT: .LBB8_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_bitcmp0_b32 s0, 0
+; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB8_3
+; GFX11-GISEL-NEXT: .LBB8_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB8_3
+; GFX11-GISEL-NEXT: .LBB8_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -130,18 +297,71 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-NEXT: s_cbranch_vccz .LBB9_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB9_3
-; CHECK-NEXT: .LBB9_2: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB9_3
-; CHECK-NEXT: .LBB9_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB9_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB9_3
+; CHECK-SDAG-NEXT: .LBB9_2: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB9_3
+; CHECK-SDAG-NEXT: .LBB9_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-NEXT: s_cbranch_vccz .LBB9_2
+; GFX10-NEXT: ; %bb.1: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB9_3
+; GFX10-NEXT: .LBB9_2: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB9_3
+; GFX10-NEXT: .LBB9_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: s_cbranch_vccz .LBB9_2
+; GFX11-NEXT: ; %bb.1: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB9_3
+; GFX11-NEXT: .LBB9_2: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB9_3
+; GFX11-NEXT: .LBB9_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB9_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB9_3
+; GFX10-GISEL-NEXT: .LBB9_2: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB9_3
+; GFX10-GISEL-NEXT: .LBB9_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB9_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB9_3
+; GFX11-GISEL-NEXT: .LBB9_2: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB9_3
+; GFX11-GISEL-NEXT: .LBB9_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -153,19 +373,76 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_bitcmp1_b32 s0, 0
-; CHECK-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT: s_cbranch_vccnz .LBB10_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_bitcmp1_b32 s0, 0
+; CHECK-SDAG-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccnz .LBB10_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB10_3
+; CHECK-SDAG-NEXT: .LBB10_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB10_3
+; CHECK-SDAG-NEXT: .LBB10_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_bitcmp1_b32 s0, 0
+; GFX10-NEXT: s_cselect_b32 s0, -1, 0
+; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB10_3
+; GFX10-NEXT: .LBB10_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB10_3
+; GFX10-NEXT: .LBB10_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_bitcmp1_b32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB10_3
+; GFX11-NEXT: .LBB10_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB10_3
+; GFX11-NEXT: .LBB10_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB10_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB10_3
+; GFX10-GISEL-NEXT: .LBB10_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB10_3
+; GFX10-GISEL-NEXT: .LBB10_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_bitcmp1_b32 s0, 0
+; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB10_3
+; GFX11-GISEL-NEXT: .LBB10_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB10_3
+; GFX11-GISEL-NEXT: .LBB10_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -177,17 +454,66 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT: s_cbranch_vccz .LBB11_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB11_3
-; CHECK-NEXT: .LBB11_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB11_3
-; CHECK-NEXT: .LBB11_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB11_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB11_3
+; CHECK-SDAG-NEXT: .LBB11_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB11_3
+; CHECK-SDAG-NEXT: .LBB11_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT: s_cbranch_vccz .LBB11_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB11_3
+; GFX10-NEXT: .LBB11_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB11_3
+; GFX10-NEXT: .LBB11_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT: s_cbranch_vccz .LBB11_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB11_3
+; GFX11-NEXT: .LBB11_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB11_3
+; GFX11-NEXT: .LBB11_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT: s_cmp_eq_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB11_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB11_3
+; GFX10-GISEL-NEXT: .LBB11_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB11_3
+; GFX10-GISEL-NEXT: .LBB11_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB11_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB11_3
+; GFX11-GISEL-NEXT: .LBB11_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB11_3
+; GFX11-GISEL-NEXT: .LBB11_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -199,17 +525,65 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_gt_u32 s0, 11
-; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB12_3
-; CHECK-NEXT: .LBB12_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB12_3
-; CHECK-NEXT: .LBB12_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_cmp_gt_u32 s0, 11
+; CHECK-SDAG-NEXT: s_cbranch_scc1 .LBB12_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB12_3
+; CHECK-SDAG-NEXT: .LBB12_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB12_3
+; CHECK-SDAG-NEXT: .LBB12_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_cmp_gt_u32 s0, 11
+; GFX10-NEXT: s_cbranch_scc1 .LBB12_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB12_3
+; GFX10-NEXT: .LBB12_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB12_3
+; GFX10-NEXT: .LBB12_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_cmp_gt_u32 s0, 11
+; GFX11-NEXT: s_cbranch_scc1 .LBB12_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB12_3
+; GFX11-NEXT: .LBB12_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB12_3
+; GFX11-NEXT: .LBB12_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_cmp_ge_u32 s0, 12
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB12_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB12_3
+; GFX10-GISEL-NEXT: .LBB12_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB12_3
+; GFX10-GISEL-NEXT: .LBB12_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_cmp_gt_u32 s0, 11
+; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB12_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB12_3
+; GFX11-GISEL-NEXT: .LBB12_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB12_3
+; GFX11-GISEL-NEXT: .LBB12_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -221,17 +595,66 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT: s_cbranch_vccz .LBB13_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB13_3
-; CHECK-NEXT: .LBB13_2: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB13_3
-; CHECK-NEXT: .LBB13_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB13_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB13_3
+; CHECK-SDAG-NEXT: .LBB13_2: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB13_3
+; CHECK-SDAG-NEXT: .LBB13_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT: s_cbranch_vccz .LBB13_2
+; GFX10-NEXT: ; %bb.1: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB13_3
+; GFX10-NEXT: .LBB13_2: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB13_3
+; GFX10-NEXT: .LBB13_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT: s_cbranch_vccz .LBB13_2
+; GFX11-NEXT: ; %bb.1: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB13_3
+; GFX11-NEXT: .LBB13_2: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB13_3
+; GFX11-NEXT: .LBB13_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB13_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB13_3
+; GFX10-GISEL-NEXT: .LBB13_2: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB13_3
+; GFX10-GISEL-NEXT: .LBB13_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB13_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB13_3
+; GFX11-GISEL-NEXT: .LBB13_2: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB13_3
+; GFX11-GISEL-NEXT: .LBB13_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -243,6 +666,17 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
+; CHECK-BOTH-LABEL: branch_uniform_ballot_eq_zero_compare:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-BOTH-NEXT: s_cbranch_scc1 .LBB14_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB14_3
+; CHECK-BOTH-NEXT: .LBB14_2: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB14_3
+; CHECK-BOTH-NEXT: .LBB14_3:
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
@@ -265,19 +699,76 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-NEXT: s_cbranch_vccz .LBB15_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB15_3
-; CHECK-NEXT: .LBB15_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB15_3
-; CHECK-NEXT: .LBB15_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_and:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB15_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB15_3
+; CHECK-SDAG-NEXT: .LBB15_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB15_3
+; CHECK-SDAG-NEXT: .LBB15_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX10-NEXT: s_cbranch_vccz .LBB15_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB15_3
+; GFX10-NEXT: .LBB15_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB15_3
+; GFX10-NEXT: .LBB15_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-NEXT: s_cbranch_vccz .LBB15_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB15_3
+; GFX11-NEXT: .LBB15_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB15_3
+; GFX11-NEXT: .LBB15_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-GISEL-NEXT: s_and_b32 s0, vcc_lo, s0
+; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB15_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB15_3
+; GFX10-GISEL-NEXT: .LBB15_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB15_3
+; GFX10-GISEL-NEXT: .LBB15_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB15_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB15_3
+; GFX11-GISEL-NEXT: .LBB15_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB15_3
+; GFX11-GISEL-NEXT: .LBB15_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -291,22 +782,90 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_gt_u32 s0, 11
-; CHECK-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-NEXT: s_cmp_lt_u32 s1, 35
-; CHECK-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-NEXT: s_or_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT: s_cbranch_vccnz .LBB16_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB16_3
-; CHECK-NEXT: .LBB16_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB16_3
-; CHECK-NEXT: .LBB16_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_and:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_cmp_gt_u32 s0, 11
+; CHECK-SDAG-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT: s_cmp_lt_u32 s1, 35
+; CHECK-SDAG-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-SDAG-NEXT: s_or_b32 s0, s0, s1
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccnz .LBB16_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB16_3
+; CHECK-SDAG-NEXT: .LBB16_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB16_3
+; CHECK-SDAG-NEXT: .LBB16_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_cmp_gt_u32 s0, 11
+; GFX10-NEXT: s_cselect_b32 s0, -1, 0
+; GFX10-NEXT: s_cmp_lt_u32 s1, 35
+; GFX10-NEXT: s_cselect_b32 s1, -1, 0
+; GFX10-NEXT: s_or_b32 s0, s0, s1
+; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB16_3
+; GFX10-NEXT: .LBB16_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB16_3
+; GFX10-NEXT: .LBB16_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_cmp_gt_u32 s0, 11
+; GFX11-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-NEXT: s_cmp_lt_u32 s1, 35
+; GFX11-NEXT: s_cselect_b32 s1, -1, 0
+; GFX11-NEXT: s_or_b32 s0, s0, s1
+; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB16_3
+; GFX11-NEXT: .LBB16_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB16_3
+; GFX11-NEXT: .LBB16_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_cmp_ge_u32 s0, 12
+; GFX10-GISEL-NEXT: s_cselect_b32 s0, 1, 0
+; GFX10-GISEL-NEXT: s_cmp_le_u32 s1, 34
+; GFX10-GISEL-NEXT: s_cselect_b32 s1, 1, 0
+; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s1
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB16_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB16_3
+; GFX10-GISEL-NEXT: .LBB16_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB16_3
+; GFX10-GISEL-NEXT: .LBB16_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_cmp_gt_u32 s0, 11
+; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT: s_cmp_lt_u32 s1, 35
+; GFX11-GISEL-NEXT: s_cselect_b32 s1, -1, 0
+; GFX11-GISEL-NEXT: s_or_b32 s0, s0, s1
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB16_3
+; GFX11-GISEL-NEXT: .LBB16_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB16_3
+; GFX11-GISEL-NEXT: .LBB16_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -320,19 +879,75 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_and:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-NEXT: s_cbranch_vccz .LBB17_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB17_3
-; CHECK-NEXT: .LBB17_2: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB17_3
-; CHECK-NEXT: .LBB17_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_and:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB17_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB17_3
+; CHECK-SDAG-NEXT: .LBB17_2: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB17_3
+; CHECK-SDAG-NEXT: .LBB17_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX10-NEXT: s_cbranch_vccz .LBB17_2
+; GFX10-NEXT: ; %bb.1: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB17_3
+; GFX10-NEXT: .LBB17_2: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB17_3
+; GFX10-NEXT: .LBB17_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-NEXT: s_cbranch_vccz .LBB17_2
+; GFX11-NEXT: ; %bb.1: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB17_3
+; GFX11-NEXT: .LBB17_2: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB17_3
+; GFX11-NEXT: .LBB17_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-GISEL-NEXT: s_and_b32 s0, vcc_lo, s0
+; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB17_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB17_3
+; GFX10-GISEL-NEXT: .LBB17_2: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB17_3
+; GFX10-GISEL-NEXT: .LBB17_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB17_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB17_3
+; GFX11-GISEL-NEXT: .LBB17_2: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB17_3
+; GFX11-GISEL-NEXT: .LBB17_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -346,22 +961,90 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-LABEL: branch_uniform_ballot_eq_zero_and:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-NEXT: s_cmp_gt_u32 s1, 34
-; CHECK-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT: s_cbranch_vccnz .LBB18_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_and:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-SDAG-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-SDAG-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-SDAG-NEXT: s_and_b32 s0, s0, s1
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccnz .LBB18_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB18_3
+; CHECK-SDAG-NEXT: .LBB18_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB18_3
+; CHECK-SDAG-NEXT: .LBB18_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_cmp_lt_u32 s0, 12
+; GFX10-NEXT: s_cselect_b32 s0, -1, 0
+; GFX10-NEXT: s_cmp_gt_u32 s1, 34
+; GFX10-NEXT: s_cselect_b32 s1, -1, 0
+; GFX10-NEXT: s_and_b32 s0, s0, s1
+; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT: s_cbranch_vccnz .LBB18_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB18_3
+; GFX10-NEXT: .LBB18_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB18_3
+; GFX10-NEXT: .LBB18_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_cmp_lt_u32 s0, 12
+; GFX11-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-NEXT: s_cmp_gt_u32 s1, 34
+; GFX11-NEXT: s_cselect_b32 s1, -1, 0
+; GFX11-NEXT: s_and_b32 s0, s0, s1
+; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT: s_cbranch_vccnz .LBB18_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB18_3
+; GFX11-NEXT: .LBB18_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB18_3
+; GFX11-NEXT: .LBB18_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_cmp_lt_u32 s0, 12
+; GFX10-GISEL-NEXT: s_cselect_b32 s0, 1, 0
+; GFX10-GISEL-NEXT: s_cmp_gt_u32 s1, 34
+; GFX10-GISEL-NEXT: s_cselect_b32 s1, 1, 0
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, s1
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB18_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB18_3
+; GFX10-GISEL-NEXT: .LBB18_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB18_3
+; GFX10-GISEL-NEXT: .LBB18_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_cmp_lt_u32 s0, 12
+; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT: s_cmp_gt_u32 s1, 34
+; GFX11-GISEL-NEXT: s_cselect_b32 s1, -1, 0
+; GFX11-GISEL-NEXT: s_and_b32 s0, s0, s1
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccnz .LBB18_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB18_3
+; GFX11-GISEL-NEXT: .LBB18_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB18_3
+; GFX11-GISEL-NEXT: .LBB18_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -375,18 +1058,72 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
-; CHECK-NEXT: s_cmp_lt_i32 s0, 23
-; CHECK-NEXT: s_cbranch_scc1 .LBB19_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB19_3
-; CHECK-NEXT: .LBB19_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB19_3
-; CHECK-NEXT: .LBB19_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_sgt_N_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
+; CHECK-SDAG-NEXT: s_cmp_lt_i32 s0, 23
+; CHECK-SDAG-NEXT: s_cbranch_scc1 .LBB19_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB19_3
+; CHECK-SDAG-NEXT: .LBB19_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB19_3
+; CHECK-SDAG-NEXT: .LBB19_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
+; GFX10-NEXT: s_cmp_lt_i32 s0, 23
+; GFX10-NEXT: s_cbranch_scc1 .LBB19_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB19_3
+; GFX10-NEXT: .LBB19_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB19_3
+; GFX10-NEXT: .LBB19_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
+; GFX11-NEXT: s_cmp_lt_i32 s0, 23
+; GFX11-NEXT: s_cbranch_scc1 .LBB19_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB19_3
+; GFX11-NEXT: .LBB19_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB19_3
+; GFX11-NEXT: .LBB19_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_cmp_lt_u32 s0, 12
+; GFX10-GISEL-NEXT: s_cselect_b32 s0, exec_lo, 0
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT: s_cmp_le_i32 s0, 22
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB19_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB19_3
+; GFX10-GISEL-NEXT: .LBB19_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB19_3
+; GFX10-GISEL-NEXT: .LBB19_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
+; GFX11-GISEL-NEXT: s_cmp_lt_i32 s0, 23
+; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB19_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB19_3
+; GFX11-GISEL-NEXT: .LBB19_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB19_3
+; GFX11-GISEL-NEXT: .LBB19_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%bc = icmp sgt i32 %ballot, 22
@@ -400,6 +1137,19 @@ false:
declare i32 @llvm.amdgcn.icmp.i32(i1, i1, i32)
define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
+; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-BOTH-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-BOTH-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-BOTH-NEXT: s_cbranch_vccnz .LBB20_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB20_3
+; CHECK-BOTH-NEXT: .LBB20_2: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB20_3
+; CHECK-BOTH-NEXT: .LBB20_3:
; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -440,6 +1190,22 @@ define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_ne_zero_and(i32 in
; s_cselect_b32 s0, s0, 0
; s_and_b32 s0, s0, exec_lo
; By selecting into vcc(_lo) instead, we could even avoid the AND-with-exec.
+; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-BOTH-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-BOTH-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-BOTH-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-BOTH-NEXT: s_and_b32 s0, s0, s1
+; CHECK-BOTH-NEXT: s_and_b32 s0, s0, exec_lo
+; CHECK-BOTH-NEXT: s_cbranch_scc1 .LBB21_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB21_3
+; CHECK-BOTH-NEXT: .LBB21_2: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB21_3
+; CHECK-BOTH-NEXT: .LBB21_3:
; CHECK-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
@@ -469,6 +1235,19 @@ false:
}
define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) {
+; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-BOTH-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-BOTH-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-BOTH-NEXT: s_cbranch_vccnz .LBB22_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB22_3
+; CHECK-BOTH-NEXT: .LBB22_2: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB22_3
+; CHECK-BOTH-NEXT: .LBB22_3:
; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -495,6 +1274,22 @@ false:
}
define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
+; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-BOTH-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-BOTH-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-BOTH-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-BOTH-NEXT: s_and_b32 s0, s0, s1
+; CHECK-BOTH-NEXT: s_and_b32 s0, s0, exec_lo
+; CHECK-BOTH-NEXT: s_cbranch_scc1 .LBB23_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB23_3
+; CHECK-BOTH-NEXT: .LBB23_2: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB23_3
+; CHECK-BOTH-NEXT: .LBB23_3:
; CHECK-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
@@ -573,6 +1368,58 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
; GFX11-NEXT: v_mov_b32_e32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: non_cst_non_compare_input:
+; GFX10-GISEL: ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cselect_b32 s0, exec_lo, 0
+; GFX10-GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX10-GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX10-GISEL-NEXT: ; %bb.1: ; %B
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 2, v2
+; GFX10-GISEL-NEXT: s_andn2_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr2
+; GFX10-GISEL-NEXT: s_and_b32 s2, exec_lo, vcc_lo
+; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s2
+; GFX10-GISEL-NEXT: ; %bb.2: ; %Flow
+; GFX10-GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
+; GFX10-GISEL-NEXT: ; %bb.3: ; %A
+; GFX10-GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 1, v2
+; GFX10-GISEL-NEXT: s_andn2_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT: s_and_b32 s2, exec_lo, vcc_lo
+; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s2
+; GFX10-GISEL-NEXT: ; %bb.4: ; %exit
+; GFX10-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX10-GISEL-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: non_cst_non_compare_input:
+; GFX11-GISEL: ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX11-GISEL-NEXT: ; implicit-def: $sgpr0
+; GFX11-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v3
+; GFX11-GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX11-GISEL-NEXT: ; %bb.1: ; %B
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e64 s0, 2, v2
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr2
+; GFX11-GISEL-NEXT: ; %bb.2: ; %Flow
+; GFX11-GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
+; GFX11-GISEL-NEXT: ; %bb.3: ; %A
+; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-GISEL-NEXT: s_and_not1_b32 s0, s0, exec_lo
+; GFX11-GISEL-NEXT: s_and_b32 s2, vcc_lo, exec_lo
+; GFX11-GISEL-NEXT: s_or_b32 s0, s0, s2
+; GFX11-GISEL-NEXT: ; %bb.4: ; %exit
+; GFX11-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-GISEL-NEXT: v_cmp_ne_u32_e64 s0, 0, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-GISEL-NEXT: s_endpgm
entry:
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %A, label %B
@@ -608,6 +1455,22 @@ define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) {
; GFX11-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, v1, v2
; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: compare_bfloats:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-GISEL-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
+; GFX10-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: compare_bfloats:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX11-GISEL-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX11-GISEL-NEXT: v_cmp_gt_f32_e64 s0, v1, v2
+; GFX11-GISEL-NEXT: ; return to shader part epilog
%cmp = fcmp ogt bfloat %x, %y
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
ret i32 %ballot
>From 6eee23f1d1831a23d2fdb337b17aaea40c640e8d Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Fri, 12 Dec 2025 09:50:36 -0500
Subject: [PATCH 2/2] Reformat.
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 44 +++++++++----------
1 file changed, 20 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 05ba285bd1379..48a29320a998c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1019,33 +1019,29 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
addRulesForIOpcs({amdgcn_icmp})
- .Any({{UniS64, _, S1},
- {{Sgpr64}, {IntrId, Vcc, Vcc}}})
- .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
- .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
- .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
-
- .Any({{UniS32, _, S1},
- {{Sgpr32}, {IntrId, Vcc, Vcc}}})
- .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
- .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
- .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+ .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+ .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
addRulesForIOpcs({amdgcn_fcmp})
- .Any({{UniS64, _, S1},
- {{Sgpr64}, {IntrId, Vcc, Vcc}}})
- .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
- .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
- .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
-
- .Any({{UniS32, _, S1},
- {{Sgpr32}, {IntrId, Vcc, Vcc}}})
- .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
- .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
- .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+ .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+ .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
addRulesForIOpcs({amdgcn_ballot}, Standard)
- .Uni(S64, {{Sgpr64}, {None, Vcc}})
- .Uni(S32, {{Sgpr32}, {None, Vcc}});
+ .Uni(S64, {{Sgpr64}, {None, Vcc}})
+ .Uni(S32, {{Sgpr32}, {None, Vcc}});
} // end initialize rules
More information about the llvm-commits
mailing list