[llvm] [AMDGPU][GlobalISel] Add register bank legalize rules for amdgcn_icmp, amdgcn_fcmp and amdgcn_ballot. (PR #172017)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 6 03:54:22 PST 2026
https://github.com/anjenner updated https://github.com/llvm/llvm-project/pull/172017
>From 4050d7d4e052fbf59d211f960a1379c371e26e47 Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Fri, 12 Dec 2025 08:53:17 -0500
Subject: [PATCH 1/4] AMDGPU: Add register bank legalize rules for amdgcn_icmp,
amdgcn_fcmp and amdgcn_ballot.
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 30 +
.../CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll | 1183 ++++++++++++++---
2 files changed, 1053 insertions(+), 160 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index d01afee331025..05ba285bd1379 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1018,4 +1018,34 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
// readfirstlaning just in case register is not in sgpr.
.Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
+ addRulesForIOpcs({amdgcn_icmp})
+ .Any({{UniS64, _, S1},
+ {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+ .Any({{UniS32, _, S1},
+ {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+
+ addRulesForIOpcs({amdgcn_fcmp})
+ .Any({{UniS64, _, S1},
+ {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+ .Any({{UniS32, _, S1},
+ {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+
+ addRulesForIOpcs({amdgcn_ballot}, Standard)
+ .Uni(S64, {{Sgpr64}, {None, Vcc}})
+ .Uni(S32, {{Sgpr32}, {None, Vcc}});
+
} // end initialize rules
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index c1f3a12dba578..3b93164539df7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -1,6 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --no-generate-body-for-unused-prefixes
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select -global-isel-abort=0 < %s | FileCheck -check-prefixes=CHECK,GFX10-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11-GISEL %s
declare i32 @llvm.amdgcn.ballot.i32(i1)
declare i32 @llvm.ctpop.i32(i32)
@@ -8,6 +10,10 @@ declare i32 @llvm.ctpop.i32(i32)
; Test ballot(0)
define amdgpu_cs i32 @constant_false() {
+; CHECK-BOTH-LABEL: constant_false:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 0
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: constant_false:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_mov_b32 s0, 0
@@ -19,10 +25,30 @@ define amdgpu_cs i32 @constant_false() {
; Test ballot(1)
define amdgpu_cs i32 @constant_true() {
-; CHECK-LABEL: constant_true:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_mov_b32 s0, exec_lo
-; CHECK-NEXT: ; return to shader part epilog
+; CHECK-SDAG-LABEL: constant_true:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_mov_b32 s0, exec_lo
+; CHECK-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: constant_true:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_mov_b32 s0, exec_lo
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: constant_true:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: constant_true:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_and_b32 s0, exec_lo, exec_lo
+; GFX10-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: constant_true:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-GISEL-NEXT: ; return to shader part epilog
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)
ret i32 %ballot
}
@@ -30,6 +56,11 @@ define amdgpu_cs i32 @constant_true() {
; Test ballot of a non-comparison operation
define amdgpu_cs i32 @non_compare(i32 %x) {
+; CHECK-BOTH-LABEL: non_compare:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-BOTH-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: non_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
@@ -43,6 +74,10 @@ define amdgpu_cs i32 @non_compare(i32 %x) {
; Test ballot of comparisons
define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
+; CHECK-BOTH-LABEL: compare_ints:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_eq_u32_e64 s0, v0, v1
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: compare_ints:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_eq_u32_e64 s0, v0, v1
@@ -53,16 +88,40 @@ define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
}
define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
-; CHECK-LABEL: compare_int_with_constant:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
-; CHECK-NEXT: ; return to shader part epilog
+; CHECK-SDAG-LABEL: compare_int_with_constant:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
+; CHECK-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: compare_int_with_constant:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: compare_int_with_constant:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: compare_int_with_constant:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_le_i32_e64 s0, 0x63, v0
+; GFX10-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: compare_int_with_constant:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
+; GFX11-GISEL-NEXT: ; return to shader part epilog
%cmp = icmp sge i32 %x, 99
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
ret i32 %ballot
}
define amdgpu_cs i32 @compare_floats(float %x, float %y) {
+; CHECK-BOTH-LABEL: compare_floats:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: compare_floats:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
@@ -73,6 +132,11 @@ define amdgpu_cs i32 @compare_floats(float %x, float %y) {
}
define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
+; CHECK-BOTH-LABEL: ctpop_of_ballot:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
+; CHECK-BOTH-NEXT: s_bcnt1_i32_b32 s0, vcc_lo
+; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: ctpop_of_ballot:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
@@ -85,18 +149,71 @@ define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
}
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-NEXT: s_cbranch_vccz .LBB7_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB7_3
-; CHECK-NEXT: .LBB7_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB7_3
-; CHECK-NEXT: .LBB7_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB7_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB7_3
+; CHECK-SDAG-NEXT: .LBB7_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB7_3
+; CHECK-SDAG-NEXT: .LBB7_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-NEXT: s_cbranch_vccz .LBB7_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB7_3
+; GFX10-NEXT: .LBB7_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB7_3
+; GFX10-NEXT: .LBB7_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: s_cbranch_vccz .LBB7_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB7_3
+; GFX11-NEXT: .LBB7_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB7_3
+; GFX11-NEXT: .LBB7_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT: s_cmp_eq_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB7_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB7_3
+; GFX10-GISEL-NEXT: .LBB7_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB7_3
+; GFX10-GISEL-NEXT: .LBB7_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB7_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB7_3
+; GFX11-GISEL-NEXT: .LBB7_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB7_3
+; GFX11-GISEL-NEXT: .LBB7_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -108,17 +225,67 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_bitcmp0_b32 s0, 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB8_3
-; CHECK-NEXT: .LBB8_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB8_3
-; CHECK-NEXT: .LBB8_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_bitcmp0_b32 s0, 0
+; CHECK-SDAG-NEXT: s_cbranch_scc1 .LBB8_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB8_3
+; CHECK-SDAG-NEXT: .LBB8_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB8_3
+; CHECK-SDAG-NEXT: .LBB8_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_bitcmp0_b32 s0, 0
+; GFX10-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB8_3
+; GFX10-NEXT: .LBB8_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB8_3
+; GFX10-NEXT: .LBB8_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_bitcmp0_b32 s0, 0
+; GFX11-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB8_3
+; GFX11-NEXT: .LBB8_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB8_3
+; GFX11-NEXT: .LBB8_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB8_3
+; GFX10-GISEL-NEXT: .LBB8_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB8_3
+; GFX10-GISEL-NEXT: .LBB8_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_bitcmp0_b32 s0, 0
+; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB8_3
+; GFX11-GISEL-NEXT: .LBB8_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB8_3
+; GFX11-GISEL-NEXT: .LBB8_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -130,18 +297,71 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-NEXT: s_cbranch_vccz .LBB9_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB9_3
-; CHECK-NEXT: .LBB9_2: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB9_3
-; CHECK-NEXT: .LBB9_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB9_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB9_3
+; CHECK-SDAG-NEXT: .LBB9_2: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB9_3
+; CHECK-SDAG-NEXT: .LBB9_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-NEXT: s_cbranch_vccz .LBB9_2
+; GFX10-NEXT: ; %bb.1: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB9_3
+; GFX10-NEXT: .LBB9_2: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB9_3
+; GFX10-NEXT: .LBB9_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: s_cbranch_vccz .LBB9_2
+; GFX11-NEXT: ; %bb.1: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB9_3
+; GFX11-NEXT: .LBB9_2: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB9_3
+; GFX11-NEXT: .LBB9_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB9_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB9_3
+; GFX10-GISEL-NEXT: .LBB9_2: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB9_3
+; GFX10-GISEL-NEXT: .LBB9_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB9_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB9_3
+; GFX11-GISEL-NEXT: .LBB9_2: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB9_3
+; GFX11-GISEL-NEXT: .LBB9_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -153,19 +373,76 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_bitcmp1_b32 s0, 0
-; CHECK-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT: s_cbranch_vccnz .LBB10_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_bitcmp1_b32 s0, 0
+; CHECK-SDAG-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccnz .LBB10_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB10_3
+; CHECK-SDAG-NEXT: .LBB10_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB10_3
+; CHECK-SDAG-NEXT: .LBB10_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_bitcmp1_b32 s0, 0
+; GFX10-NEXT: s_cselect_b32 s0, -1, 0
+; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB10_3
+; GFX10-NEXT: .LBB10_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB10_3
+; GFX10-NEXT: .LBB10_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_bitcmp1_b32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB10_3
+; GFX11-NEXT: .LBB10_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB10_3
+; GFX11-NEXT: .LBB10_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB10_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB10_3
+; GFX10-GISEL-NEXT: .LBB10_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB10_3
+; GFX10-GISEL-NEXT: .LBB10_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_bitcmp1_b32 s0, 0
+; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB10_3
+; GFX11-GISEL-NEXT: .LBB10_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB10_3
+; GFX11-GISEL-NEXT: .LBB10_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -177,17 +454,66 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT: s_cbranch_vccz .LBB11_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB11_3
-; CHECK-NEXT: .LBB11_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB11_3
-; CHECK-NEXT: .LBB11_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB11_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB11_3
+; CHECK-SDAG-NEXT: .LBB11_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB11_3
+; CHECK-SDAG-NEXT: .LBB11_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT: s_cbranch_vccz .LBB11_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB11_3
+; GFX10-NEXT: .LBB11_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB11_3
+; GFX10-NEXT: .LBB11_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT: s_cbranch_vccz .LBB11_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB11_3
+; GFX11-NEXT: .LBB11_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB11_3
+; GFX11-NEXT: .LBB11_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT: s_cmp_eq_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB11_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB11_3
+; GFX10-GISEL-NEXT: .LBB11_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB11_3
+; GFX10-GISEL-NEXT: .LBB11_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB11_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB11_3
+; GFX11-GISEL-NEXT: .LBB11_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB11_3
+; GFX11-GISEL-NEXT: .LBB11_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -199,17 +525,65 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_gt_u32 s0, 11
-; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB12_3
-; CHECK-NEXT: .LBB12_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB12_3
-; CHECK-NEXT: .LBB12_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_cmp_gt_u32 s0, 11
+; CHECK-SDAG-NEXT: s_cbranch_scc1 .LBB12_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB12_3
+; CHECK-SDAG-NEXT: .LBB12_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB12_3
+; CHECK-SDAG-NEXT: .LBB12_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_cmp_gt_u32 s0, 11
+; GFX10-NEXT: s_cbranch_scc1 .LBB12_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB12_3
+; GFX10-NEXT: .LBB12_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB12_3
+; GFX10-NEXT: .LBB12_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_cmp_gt_u32 s0, 11
+; GFX11-NEXT: s_cbranch_scc1 .LBB12_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB12_3
+; GFX11-NEXT: .LBB12_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB12_3
+; GFX11-NEXT: .LBB12_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_cmp_ge_u32 s0, 12
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB12_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB12_3
+; GFX10-GISEL-NEXT: .LBB12_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB12_3
+; GFX10-GISEL-NEXT: .LBB12_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_cmp_gt_u32 s0, 11
+; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB12_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB12_3
+; GFX11-GISEL-NEXT: .LBB12_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB12_3
+; GFX11-GISEL-NEXT: .LBB12_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -221,17 +595,66 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT: s_cbranch_vccz .LBB13_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB13_3
-; CHECK-NEXT: .LBB13_2: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB13_3
-; CHECK-NEXT: .LBB13_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB13_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB13_3
+; CHECK-SDAG-NEXT: .LBB13_2: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB13_3
+; CHECK-SDAG-NEXT: .LBB13_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT: s_cbranch_vccz .LBB13_2
+; GFX10-NEXT: ; %bb.1: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB13_3
+; GFX10-NEXT: .LBB13_2: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB13_3
+; GFX10-NEXT: .LBB13_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT: s_cbranch_vccz .LBB13_2
+; GFX11-NEXT: ; %bb.1: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB13_3
+; GFX11-NEXT: .LBB13_2: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB13_3
+; GFX11-NEXT: .LBB13_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 vcc_lo, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB13_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB13_3
+; GFX10-GISEL-NEXT: .LBB13_2: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB13_3
+; GFX10-GISEL-NEXT: .LBB13_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB13_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB13_3
+; GFX11-GISEL-NEXT: .LBB13_2: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB13_3
+; GFX11-GISEL-NEXT: .LBB13_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -243,6 +666,17 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
+; CHECK-BOTH-LABEL: branch_uniform_ballot_eq_zero_compare:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-BOTH-NEXT: s_cbranch_scc1 .LBB14_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB14_3
+; CHECK-BOTH-NEXT: .LBB14_2: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB14_3
+; CHECK-BOTH-NEXT: .LBB14_3:
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
@@ -265,19 +699,76 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
-; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-NEXT: s_cbranch_vccz .LBB15_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB15_3
-; CHECK-NEXT: .LBB15_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB15_3
-; CHECK-NEXT: .LBB15_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_and:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB15_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB15_3
+; CHECK-SDAG-NEXT: .LBB15_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB15_3
+; CHECK-SDAG-NEXT: .LBB15_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX10-NEXT: s_cbranch_vccz .LBB15_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB15_3
+; GFX10-NEXT: .LBB15_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB15_3
+; GFX10-NEXT: .LBB15_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-NEXT: s_cbranch_vccz .LBB15_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB15_3
+; GFX11-NEXT: .LBB15_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB15_3
+; GFX11-NEXT: .LBB15_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-GISEL-NEXT: s_and_b32 s0, vcc_lo, s0
+; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB15_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB15_3
+; GFX10-GISEL-NEXT: .LBB15_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB15_3
+; GFX10-GISEL-NEXT: .LBB15_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB15_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB15_3
+; GFX11-GISEL-NEXT: .LBB15_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB15_3
+; GFX11-GISEL-NEXT: .LBB15_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -291,22 +782,90 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_gt_u32 s0, 11
-; CHECK-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-NEXT: s_cmp_lt_u32 s1, 35
-; CHECK-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-NEXT: s_or_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT: s_cbranch_vccnz .LBB16_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB16_3
-; CHECK-NEXT: .LBB16_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB16_3
-; CHECK-NEXT: .LBB16_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_and:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_cmp_gt_u32 s0, 11
+; CHECK-SDAG-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT: s_cmp_lt_u32 s1, 35
+; CHECK-SDAG-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-SDAG-NEXT: s_or_b32 s0, s0, s1
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccnz .LBB16_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB16_3
+; CHECK-SDAG-NEXT: .LBB16_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB16_3
+; CHECK-SDAG-NEXT: .LBB16_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_cmp_gt_u32 s0, 11
+; GFX10-NEXT: s_cselect_b32 s0, -1, 0
+; GFX10-NEXT: s_cmp_lt_u32 s1, 35
+; GFX10-NEXT: s_cselect_b32 s1, -1, 0
+; GFX10-NEXT: s_or_b32 s0, s0, s1
+; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB16_3
+; GFX10-NEXT: .LBB16_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB16_3
+; GFX10-NEXT: .LBB16_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_cmp_gt_u32 s0, 11
+; GFX11-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-NEXT: s_cmp_lt_u32 s1, 35
+; GFX11-NEXT: s_cselect_b32 s1, -1, 0
+; GFX11-NEXT: s_or_b32 s0, s0, s1
+; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB16_3
+; GFX11-NEXT: .LBB16_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB16_3
+; GFX11-NEXT: .LBB16_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_cmp_ge_u32 s0, 12
+; GFX10-GISEL-NEXT: s_cselect_b32 s0, 1, 0
+; GFX10-GISEL-NEXT: s_cmp_le_u32 s1, 34
+; GFX10-GISEL-NEXT: s_cselect_b32 s1, 1, 0
+; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s1
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB16_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB16_3
+; GFX10-GISEL-NEXT: .LBB16_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB16_3
+; GFX10-GISEL-NEXT: .LBB16_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_cmp_gt_u32 s0, 11
+; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT: s_cmp_lt_u32 s1, 35
+; GFX11-GISEL-NEXT: s_cselect_b32 s1, -1, 0
+; GFX11-GISEL-NEXT: s_or_b32 s0, s0, s1
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB16_3
+; GFX11-GISEL-NEXT: .LBB16_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB16_3
+; GFX11-GISEL-NEXT: .LBB16_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -320,19 +879,75 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
-; CHECK-LABEL: branch_divergent_ballot_eq_zero_and:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-NEXT: s_cbranch_vccz .LBB17_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB17_3
-; CHECK-NEXT: .LBB17_2: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB17_3
-; CHECK-NEXT: .LBB17_3:
+; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_and:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-SDAG-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB17_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB17_3
+; CHECK-SDAG-NEXT: .LBB17_2: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB17_3
+; CHECK-SDAG-NEXT: .LBB17_3:
+;
+; GFX10-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX10-NEXT: s_cbranch_vccz .LBB17_2
+; GFX10-NEXT: ; %bb.1: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB17_3
+; GFX10-NEXT: .LBB17_2: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB17_3
+; GFX10-NEXT: .LBB17_3:
+;
+; GFX11-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-NEXT: s_cbranch_vccz .LBB17_2
+; GFX11-NEXT: ; %bb.1: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB17_3
+; GFX11-NEXT: .LBB17_2: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB17_3
+; GFX11-NEXT: .LBB17_3:
+;
+; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX10-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX10-GISEL-NEXT: s_and_b32 s0, vcc_lo, s0
+; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB17_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB17_3
+; GFX10-GISEL-NEXT: .LBB17_2: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB17_3
+; GFX10-GISEL-NEXT: .LBB17_3:
+;
+; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; GFX11-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB17_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB17_3
+; GFX11-GISEL-NEXT: .LBB17_2: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB17_3
+; GFX11-GISEL-NEXT: .LBB17_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -346,22 +961,90 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-LABEL: branch_uniform_ballot_eq_zero_and:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-NEXT: s_cmp_gt_u32 s1, 34
-; CHECK-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-NEXT: s_cbranch_vccnz .LBB18_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_and:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-SDAG-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-SDAG-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-SDAG-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-SDAG-NEXT: s_and_b32 s0, s0, s1
+; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-SDAG-NEXT: s_cbranch_vccnz .LBB18_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB18_3
+; CHECK-SDAG-NEXT: .LBB18_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB18_3
+; CHECK-SDAG-NEXT: .LBB18_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_cmp_lt_u32 s0, 12
+; GFX10-NEXT: s_cselect_b32 s0, -1, 0
+; GFX10-NEXT: s_cmp_gt_u32 s1, 34
+; GFX10-NEXT: s_cselect_b32 s1, -1, 0
+; GFX10-NEXT: s_and_b32 s0, s0, s1
+; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX10-NEXT: s_cbranch_vccnz .LBB18_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB18_3
+; GFX10-NEXT: .LBB18_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB18_3
+; GFX10-NEXT: .LBB18_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_cmp_lt_u32 s0, 12
+; GFX11-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-NEXT: s_cmp_gt_u32 s1, 34
+; GFX11-NEXT: s_cselect_b32 s1, -1, 0
+; GFX11-NEXT: s_and_b32 s0, s0, s1
+; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-NEXT: s_cbranch_vccnz .LBB18_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB18_3
+; GFX11-NEXT: .LBB18_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB18_3
+; GFX11-NEXT: .LBB18_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_cmp_lt_u32 s0, 12
+; GFX10-GISEL-NEXT: s_cselect_b32 s0, 1, 0
+; GFX10-GISEL-NEXT: s_cmp_gt_u32 s1, 34
+; GFX10-GISEL-NEXT: s_cselect_b32 s1, 1, 0
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, s1
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB18_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB18_3
+; GFX10-GISEL-NEXT: .LBB18_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB18_3
+; GFX10-GISEL-NEXT: .LBB18_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_cmp_lt_u32 s0, 12
+; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-GISEL-NEXT: s_cmp_gt_u32 s1, 34
+; GFX11-GISEL-NEXT: s_cselect_b32 s1, -1, 0
+; GFX11-GISEL-NEXT: s_and_b32 s0, s0, s1
+; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-GISEL-NEXT: s_cbranch_vccnz .LBB18_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB18_3
+; GFX11-GISEL-NEXT: .LBB18_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB18_3
+; GFX11-GISEL-NEXT: .LBB18_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -375,18 +1058,72 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) {
-; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
-; CHECK-NEXT: s_cmp_lt_i32 s0, 23
-; CHECK-NEXT: s_cbranch_scc1 .LBB19_2
-; CHECK-NEXT: ; %bb.1: ; %true
-; CHECK-NEXT: s_mov_b32 s0, 42
-; CHECK-NEXT: s_branch .LBB19_3
-; CHECK-NEXT: .LBB19_2: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB19_3
-; CHECK-NEXT: .LBB19_3:
+; CHECK-SDAG-LABEL: branch_uniform_ballot_sgt_N_compare:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
+; CHECK-SDAG-NEXT: s_cmp_lt_i32 s0, 23
+; CHECK-SDAG-NEXT: s_cbranch_scc1 .LBB19_2
+; CHECK-SDAG-NEXT: ; %bb.1: ; %true
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
+; CHECK-SDAG-NEXT: s_branch .LBB19_3
+; CHECK-SDAG-NEXT: .LBB19_2: ; %false
+; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
+; CHECK-SDAG-NEXT: s_branch .LBB19_3
+; CHECK-SDAG-NEXT: .LBB19_3:
+;
+; GFX10-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
+; GFX10-NEXT: s_cmp_lt_i32 s0, 23
+; GFX10-NEXT: s_cbranch_scc1 .LBB19_2
+; GFX10-NEXT: ; %bb.1: ; %true
+; GFX10-NEXT: s_mov_b32 s0, 42
+; GFX10-NEXT: s_branch .LBB19_3
+; GFX10-NEXT: .LBB19_2: ; %false
+; GFX10-NEXT: s_mov_b32 s0, 33
+; GFX10-NEXT: s_branch .LBB19_3
+; GFX10-NEXT: .LBB19_3:
+;
+; GFX11-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
+; GFX11-NEXT: s_cmp_lt_i32 s0, 23
+; GFX11-NEXT: s_cbranch_scc1 .LBB19_2
+; GFX11-NEXT: ; %bb.1: ; %true
+; GFX11-NEXT: s_mov_b32 s0, 42
+; GFX11-NEXT: s_branch .LBB19_3
+; GFX11-NEXT: .LBB19_2: ; %false
+; GFX11-NEXT: s_mov_b32 s0, 33
+; GFX11-NEXT: s_branch .LBB19_3
+; GFX11-NEXT: .LBB19_3:
+;
+; GFX10-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_cmp_lt_u32 s0, 12
+; GFX10-GISEL-NEXT: s_cselect_b32 s0, exec_lo, 0
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT: s_cmp_le_i32 s0, 22
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB19_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %true
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX10-GISEL-NEXT: s_branch .LBB19_3
+; GFX10-GISEL-NEXT: .LBB19_2: ; %false
+; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX10-GISEL-NEXT: s_branch .LBB19_3
+; GFX10-GISEL-NEXT: .LBB19_3:
+;
+; GFX11-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
+; GFX11-GISEL-NEXT: s_cmp_lt_i32 s0, 23
+; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB19_2
+; GFX11-GISEL-NEXT: ; %bb.1: ; %true
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
+; GFX11-GISEL-NEXT: s_branch .LBB19_3
+; GFX11-GISEL-NEXT: .LBB19_2: ; %false
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
+; GFX11-GISEL-NEXT: s_branch .LBB19_3
+; GFX11-GISEL-NEXT: .LBB19_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%bc = icmp sgt i32 %ballot, 22
@@ -400,6 +1137,19 @@ false:
declare i32 @llvm.amdgcn.icmp.i32(i1, i1, i32)
define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
+; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-BOTH-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-BOTH-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-BOTH-NEXT: s_cbranch_vccnz .LBB20_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB20_3
+; CHECK-BOTH-NEXT: .LBB20_2: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB20_3
+; CHECK-BOTH-NEXT: .LBB20_3:
; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -440,6 +1190,22 @@ define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_ne_zero_and(i32 in
; s_cselect_b32 s0, s0, 0
; s_and_b32 s0, s0, exec_lo
; By selecting into vcc(_lo) instead, we could even avoid the AND-with-exec.
+; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-BOTH-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-BOTH-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-BOTH-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-BOTH-NEXT: s_and_b32 s0, s0, s1
+; CHECK-BOTH-NEXT: s_and_b32 s0, s0, exec_lo
+; CHECK-BOTH-NEXT: s_cbranch_scc1 .LBB21_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB21_3
+; CHECK-BOTH-NEXT: .LBB21_2: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB21_3
+; CHECK-BOTH-NEXT: .LBB21_3:
; CHECK-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
@@ -469,6 +1235,19 @@ false:
}
define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) {
+; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-BOTH-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-BOTH-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-BOTH-NEXT: s_cbranch_vccnz .LBB22_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB22_3
+; CHECK-BOTH-NEXT: .LBB22_2: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB22_3
+; CHECK-BOTH-NEXT: .LBB22_3:
; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -495,6 +1274,22 @@ false:
}
define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
+; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
+; CHECK-BOTH: ; %bb.0:
+; CHECK-BOTH-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-BOTH-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-BOTH-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-BOTH-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-BOTH-NEXT: s_and_b32 s0, s0, s1
+; CHECK-BOTH-NEXT: s_and_b32 s0, s0, exec_lo
+; CHECK-BOTH-NEXT: s_cbranch_scc1 .LBB23_2
+; CHECK-BOTH-NEXT: ; %bb.1: ; %false
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
+; CHECK-BOTH-NEXT: s_branch .LBB23_3
+; CHECK-BOTH-NEXT: .LBB23_2: ; %true
+; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
+; CHECK-BOTH-NEXT: s_branch .LBB23_3
+; CHECK-BOTH-NEXT: .LBB23_3:
; CHECK-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
@@ -573,6 +1368,58 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
; GFX11-NEXT: v_mov_b32_e32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: non_cst_non_compare_input:
+; GFX10-GISEL: ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, 1
+; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-GISEL-NEXT: s_cselect_b32 s0, exec_lo, 0
+; GFX10-GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX10-GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX10-GISEL-NEXT: ; %bb.1: ; %B
+; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 2, v2
+; GFX10-GISEL-NEXT: s_andn2_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr2
+; GFX10-GISEL-NEXT: s_and_b32 s2, exec_lo, vcc_lo
+; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s2
+; GFX10-GISEL-NEXT: ; %bb.2: ; %Flow
+; GFX10-GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
+; GFX10-GISEL-NEXT: ; %bb.3: ; %A
+; GFX10-GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 1, v2
+; GFX10-GISEL-NEXT: s_andn2_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT: s_and_b32 s2, exec_lo, vcc_lo
+; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s2
+; GFX10-GISEL-NEXT: ; %bb.4: ; %exit
+; GFX10-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX10-GISEL-NEXT: s_and_b32 s0, s0, exec_lo
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX10-GISEL-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: non_cst_non_compare_input:
+; GFX11-GISEL: ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX11-GISEL-NEXT: ; implicit-def: $sgpr0
+; GFX11-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v3
+; GFX11-GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX11-GISEL-NEXT: ; %bb.1: ; %B
+; GFX11-GISEL-NEXT: v_cmp_gt_u32_e64 s0, 2, v2
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr2
+; GFX11-GISEL-NEXT: ; %bb.2: ; %Flow
+; GFX11-GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
+; GFX11-GISEL-NEXT: ; %bb.3: ; %A
+; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-GISEL-NEXT: s_and_not1_b32 s0, s0, exec_lo
+; GFX11-GISEL-NEXT: s_and_b32 s2, vcc_lo, exec_lo
+; GFX11-GISEL-NEXT: s_or_b32 s0, s0, s2
+; GFX11-GISEL-NEXT: ; %bb.4: ; %exit
+; GFX11-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-GISEL-NEXT: v_cmp_ne_u32_e64 s0, 0, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-GISEL-NEXT: s_endpgm
entry:
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %A, label %B
@@ -608,6 +1455,22 @@ define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) {
; GFX11-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, v1, v2
; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: compare_bfloats:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-GISEL-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
+; GFX10-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: compare_bfloats:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX11-GISEL-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX11-GISEL-NEXT: v_cmp_gt_f32_e64 s0, v1, v2
+; GFX11-GISEL-NEXT: ; return to shader part epilog
%cmp = fcmp ogt bfloat %x, %y
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
ret i32 %ballot
>From 6eee23f1d1831a23d2fdb337b17aaea40c640e8d Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Fri, 12 Dec 2025 09:50:36 -0500
Subject: [PATCH 2/4] Reformat.
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 44 +++++++++----------
1 file changed, 20 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 05ba285bd1379..48a29320a998c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1019,33 +1019,29 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
addRulesForIOpcs({amdgcn_icmp})
- .Any({{UniS64, _, S1},
- {{Sgpr64}, {IntrId, Vcc, Vcc}}})
- .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
- .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
- .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
-
- .Any({{UniS32, _, S1},
- {{Sgpr32}, {IntrId, Vcc, Vcc}}})
- .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
- .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
- .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+ .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+ .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
addRulesForIOpcs({amdgcn_fcmp})
- .Any({{UniS64, _, S1},
- {{Sgpr64}, {IntrId, Vcc, Vcc}}})
- .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
- .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
- .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
-
- .Any({{UniS32, _, S1},
- {{Sgpr32}, {IntrId, Vcc, Vcc}}})
- .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
- .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
- .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
+ .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
+
+ .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
+ .Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
+ .Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
addRulesForIOpcs({amdgcn_ballot}, Standard)
- .Uni(S64, {{Sgpr64}, {None, Vcc}})
- .Uni(S32, {{Sgpr32}, {None, Vcc}});
+ .Uni(S64, {{Sgpr64}, {None, Vcc}})
+ .Uni(S32, {{Sgpr32}, {None, Vcc}});
} // end initialize rules
>From 5fffb40a7a050d49cf5a02665a86daefdd8871c0 Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Wed, 17 Dec 2025 05:33:23 -0500
Subject: [PATCH 3/4] Update tests.
---
.../GlobalISel/regbankselect-amdgcn.fcmp.mir | 4 +-
.../GlobalISel/regbankselect-amdgcn.icmp.mir | 4 +-
.../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll | 4 +-
.../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll | 170 ++++++++++++------
.../CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll | 12 +-
.../CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll | 15 +-
6 files changed, 122 insertions(+), 87 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
index be59d356af05e..00c2a6102de38 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
---
name: fcmp_ss
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
index 434cc138f3704..2ba4a3c6cf043 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
---
name: icmp_ss
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
index 87a9ba30490a0..edda79b813378 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
@@ -2,8 +2,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=SDAG-GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=SDAG-GFX10 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
declare i32 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
declare i32 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
index 9e4824694e76a..67a973d3b0e07 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-GISEL %s
declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
@@ -26,17 +26,29 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_endpgm
;
-; GFX9-LABEL: v_fcmp_f32_oeq_with_fabs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s3
-; GFX9-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, |v0|
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_fcmp_f32_oeq_with_fabs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-SDAG-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, |v0|
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f32_oeq_with_fabs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_cmp_eq_f32_e64 s[2:3], v0, |s3|
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: v_fcmp_f32_oeq_with_fabs:
; VI-SDAG: ; %bb.0:
@@ -55,8 +67,8 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
-; VI-GISEL-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, |v0|
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_cmp_eq_f32_e64 s[2:3], v0, |s3|
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -82,17 +94,29 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(ptr addrspace(
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_endpgm
;
-; GFX9-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s3
-; GFX9-NEXT: v_cmp_eq_f32_e64 s[2:3], |s2|, |v0|
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-SDAG-NEXT: v_cmp_eq_f32_e64 s[2:3], |s2|, |v0|
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_cmp_eq_f32_e64 s[2:3], |v0|, |s3|
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs:
; VI-SDAG: ; %bb.0:
@@ -111,8 +135,8 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(ptr addrspace(
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
-; VI-GISEL-NEXT: v_cmp_eq_f32_e64 s[2:3], |s2|, |v0|
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_cmp_eq_f32_e64 s[2:3], |v0|, |s3|
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -1838,19 +1862,33 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(ptr addrspace(1) %out, half
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_endpgm
;
-; GFX9-LABEL: v_fcmp_f16_oeq_with_fabs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_lshr_b32 s3, s2, 16
-; GFX9-NEXT: v_mov_b32_e32 v0, s3
-; GFX9-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, |v0|
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_fcmp_f16_oeq_with_fabs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_lshr_b32 s3, s2, 16
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-SDAG-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, |v0|
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f16_oeq_with_fabs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_lshr_b32 s3, s2, 16
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_cmp_eq_f16_e64 s[2:3], v0, |s3|
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: v_fcmp_f16_oeq_with_fabs:
; VI-SDAG: ; %bb.0:
@@ -1873,8 +1911,8 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(ptr addrspace(1) %out, half
; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: s_lshr_b32 s3, s2, 16
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
-; VI-GISEL-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, |v0|
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_cmp_eq_f16_e64 s[2:3], v0, |s3|
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -1905,19 +1943,33 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(ptr addrspace(
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_endpgm
;
-; GFX9-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_lshr_b32 s3, s2, 16
-; GFX9-NEXT: v_mov_b32_e32 v0, s3
-; GFX9-NEXT: v_cmp_eq_f16_e64 s[2:3], |s2|, |v0|
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_lshr_b32 s3, s2, 16
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-SDAG-NEXT: v_cmp_eq_f16_e64 s[2:3], |s2|, |v0|
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_lshr_b32 s3, s2, 16
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_cmp_eq_f16_e64 s[2:3], |v0|, |s3|
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs:
; VI-SDAG: ; %bb.0:
@@ -1940,8 +1992,8 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(ptr addrspace(
; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: s_lshr_b32 s3, s2, 16
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
-; VI-GISEL-NEXT: v_cmp_eq_f16_e64 s[2:3], |s2|, |v0|
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_cmp_eq_f16_e64 s[2:3], |v0|, |s3|
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
index e2b068e2e9105..f8ccd40d37bcc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
@@ -2,16 +2,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,SDAG-GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,SDAG-GFX10 %s
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s 2>%t | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s 2>%t | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-
-; Note: GlobalISel abort is disabled so we don't crash on i1 inputs.
-; They are allowed in DAGISel but we (intentionally) don't support them
-; in GlobalISel.
-
-; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
declare i32 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
index 366b71bae75c9..e6278cf2a52ee 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
@@ -3,18 +3,9 @@
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,SDAG-VI %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,SDAG-GFX9 %s
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s 2>%t | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=fiji < %s 2>%t | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx900 < %s 2>%t | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
-; RUN: FileCheck --check-prefix=ERR %s < %t
-
-; Note: GlobalISel abort is disabled so we don't crash on i1 inputs.
-; They are allowed in DAGISel but we (intentionally) don't support them
-; in GlobalISel.
-
-; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
>From a5ec298611216f48c6230583293e8312212e151d Mon Sep 17 00:00:00 2001
From: Andrew Jenner <Andrew.Jenner at amd.com>
Date: Tue, 6 Jan 2026 07:03:49 -0500
Subject: [PATCH 4/4] Make various changes requested in review feedback.
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 15 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 16 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 1 +
.../GlobalISel/regbankselect-amdgcn.fcmp.mir | 3 +-
.../GlobalISel/regbankselect-amdgcn.icmp.mir | 3 +-
.../CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll | 1183 +++--------------
.../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll | 4 +-
.../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll | 6 +-
.../CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll | 92 +-
.../CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll | 148 ++-
10 files changed, 354 insertions(+), 1117 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index cc31d7d5c55ac..9f57b2cd94a7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -934,6 +934,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case Sgpr32Trunc:
case Sgpr32AExt:
case Sgpr32AExtBoolInReg:
+ case Vgpr32AExtBoolInReg:
case Sgpr32SExt:
case Sgpr32ZExt:
case UniInVgprS32:
@@ -1084,6 +1085,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
case Sgpr32Trunc:
case Sgpr32AExt:
case Sgpr32AExtBoolInReg:
+ case Vgpr32AExtBoolInReg:
case Sgpr32SExt:
case Sgpr32ZExt:
return SgprRB;
@@ -1380,6 +1382,19 @@ bool RegBankLegalizeHelper::applyMappingSrc(
Op.setReg(BoolInReg.getReg(0));
break;
}
+ case Vgpr32AExtBoolInReg: {
+ // Note: this ext allows S1, and it is meant to be combined away.
+ assert(Ty.getSizeInBits() == 1);
+ assert(RB == SgprRB);
+ auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
+ // Zext SgprS1 is not legal, make AND with 1 instead. This instruction is
+ // most of times meant to be combined away in AMDGPURegBankCombiner.
+ auto Cst1 = B.buildConstant(SgprRB_S32, 1);
+ auto BoolInReg = B.buildAnd(SgprRB_S32, Aext, Cst1);
+
+ Op.setReg(B.buildCopy(VgprRB_S32, BoolInReg).getReg(0));
+ break;
+ }
case Sgpr32SExt: {
assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
assert(RB == SgprRB);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 48a29320a998c..49ac8d208b461 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1019,29 +1019,29 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
addRulesForIOpcs({amdgcn_icmp})
- .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S1},
+ {{Sgpr64}, {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg}}})
.Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
.Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
.Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
- .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S1},
+ {{Sgpr32}, {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg}}})
.Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
.Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
.Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
addRulesForIOpcs({amdgcn_fcmp})
- .Any({{UniS64, _, S1}, {{Sgpr64}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS64, _, S1},
+ {{Sgpr64}, {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg}}})
.Any({{UniS64, _, S16}, {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
.Any({{UniS64, _, S32}, {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
.Any({{UniS64, _, S64}, {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
- .Any({{UniS32, _, S1}, {{Sgpr32}, {IntrId, Vcc, Vcc}}})
+ .Any({{UniS32, _, S1},
+ {{Sgpr32}, {IntrId, Vgpr32AExtBoolInReg, Vgpr32AExtBoolInReg}}})
.Any({{UniS32, _, S16}, {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
.Any({{UniS32, _, S32}, {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
.Any({{UniS32, _, S64}, {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
- addRulesForIOpcs({amdgcn_ballot}, Standard)
- .Uni(S64, {{Sgpr64}, {None, Vcc}})
- .Uni(S32, {{Sgpr32}, {None, Vcc}});
-
} // end initialize rules
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 1ac117304b76f..67e9f510640c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -204,6 +204,7 @@ enum RegBankLLTMappingApplyID {
// Src only modifiers: extends
Sgpr32AExt,
Sgpr32AExtBoolInReg,
+ Vgpr32AExtBoolInReg,
Sgpr32SExt,
Sgpr32ZExt,
Vgpr32SExt,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
index 00c2a6102de38..685a712bbcd18 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s
---
name: fcmp_ss
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
index 2ba4a3c6cf043..5c181562954d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s
---
name: icmp_ss
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index 3b93164539df7..c1f3a12dba578 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -1,8 +1,6 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --no-generate-body-for-unused-prefixes
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select -global-isel-abort=0 < %s | FileCheck -check-prefixes=CHECK,GFX10-GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 < %s | FileCheck -check-prefixes=CHECK,GFX11-GISEL %s
declare i32 @llvm.amdgcn.ballot.i32(i1)
declare i32 @llvm.ctpop.i32(i32)
@@ -10,10 +8,6 @@ declare i32 @llvm.ctpop.i32(i32)
; Test ballot(0)
define amdgpu_cs i32 @constant_false() {
-; CHECK-BOTH-LABEL: constant_false:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 0
-; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: constant_false:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_mov_b32 s0, 0
@@ -25,30 +19,10 @@ define amdgpu_cs i32 @constant_false() {
; Test ballot(1)
define amdgpu_cs i32 @constant_true() {
-; CHECK-SDAG-LABEL: constant_true:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: s_mov_b32 s0, exec_lo
-; CHECK-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX10-LABEL: constant_true:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_mov_b32 s0, exec_lo
-; GFX10-NEXT: ; return to shader part epilog
-;
-; GFX11-LABEL: constant_true:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_mov_b32 s0, exec_lo
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX10-GISEL-LABEL: constant_true:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_and_b32 s0, exec_lo, exec_lo
-; GFX10-GISEL-NEXT: ; return to shader part epilog
-;
-; GFX11-GISEL-LABEL: constant_true:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_mov_b32 s0, exec_lo
-; GFX11-GISEL-NEXT: ; return to shader part epilog
+; CHECK-LABEL: constant_true:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_mov_b32 s0, exec_lo
+; CHECK-NEXT: ; return to shader part epilog
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)
ret i32 %ballot
}
@@ -56,11 +30,6 @@ define amdgpu_cs i32 @constant_true() {
; Test ballot of a non-comparison operation
define amdgpu_cs i32 @non_compare(i32 %x) {
-; CHECK-BOTH-LABEL: non_compare:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: v_and_b32_e32 v0, 1, v0
-; CHECK-BOTH-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: non_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
@@ -74,10 +43,6 @@ define amdgpu_cs i32 @non_compare(i32 %x) {
; Test ballot of comparisons
define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
-; CHECK-BOTH-LABEL: compare_ints:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: v_cmp_eq_u32_e64 s0, v0, v1
-; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: compare_ints:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_eq_u32_e64 s0, v0, v1
@@ -88,40 +53,16 @@ define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
}
define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
-; CHECK-SDAG-LABEL: compare_int_with_constant:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
-; CHECK-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX10-LABEL: compare_int_with_constant:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
-; GFX10-NEXT: ; return to shader part epilog
-;
-; GFX11-LABEL: compare_int_with_constant:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX10-GISEL-LABEL: compare_int_with_constant:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: v_cmp_le_i32_e64 s0, 0x63, v0
-; GFX10-GISEL-NEXT: ; return to shader part epilog
-;
-; GFX11-GISEL-LABEL: compare_int_with_constant:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
-; GFX11-GISEL-NEXT: ; return to shader part epilog
+; CHECK-LABEL: compare_int_with_constant:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0
+; CHECK-NEXT: ; return to shader part epilog
%cmp = icmp sge i32 %x, 99
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
ret i32 %ballot
}
define amdgpu_cs i32 @compare_floats(float %x, float %y) {
-; CHECK-BOTH-LABEL: compare_floats:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
-; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: compare_floats:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
@@ -132,11 +73,6 @@ define amdgpu_cs i32 @compare_floats(float %x, float %y) {
}
define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
-; CHECK-BOTH-LABEL: ctpop_of_ballot:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
-; CHECK-BOTH-NEXT: s_bcnt1_i32_b32 s0, vcc_lo
-; CHECK-BOTH-NEXT: ; return to shader part epilog
; CHECK-LABEL: ctpop_of_ballot:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
@@ -149,71 +85,18 @@ define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
}
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
-; CHECK-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB7_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB7_3
-; CHECK-SDAG-NEXT: .LBB7_2: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB7_3
-; CHECK-SDAG-NEXT: .LBB7_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-NEXT: s_cbranch_vccz .LBB7_2
-; GFX10-NEXT: ; %bb.1: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB7_3
-; GFX10-NEXT: .LBB7_2: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB7_3
-; GFX10-NEXT: .LBB7_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: s_cbranch_vccz .LBB7_2
-; GFX11-NEXT: ; %bb.1: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB7_3
-; GFX11-NEXT: .LBB7_2: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB7_3
-; GFX11-NEXT: .LBB7_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: s_cmp_eq_u32 vcc_lo, 0
-; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB7_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB7_3
-; GFX10-GISEL-NEXT: .LBB7_2: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB7_3
-; GFX10-GISEL-NEXT: .LBB7_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_non_compare:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB7_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB7_3
-; GFX11-GISEL-NEXT: .LBB7_2: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB7_3
-; GFX11-GISEL-NEXT: .LBB7_3:
+; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-NEXT: s_cbranch_vccz .LBB7_2
+; CHECK-NEXT: ; %bb.1: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB7_3
+; CHECK-NEXT: .LBB7_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB7_3
+; CHECK-NEXT: .LBB7_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -225,67 +108,17 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: s_bitcmp0_b32 s0, 0
-; CHECK-SDAG-NEXT: s_cbranch_scc1 .LBB8_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB8_3
-; CHECK-SDAG-NEXT: .LBB8_2: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB8_3
-; CHECK-SDAG-NEXT: .LBB8_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_bitcmp0_b32 s0, 0
-; GFX10-NEXT: s_cbranch_scc1 .LBB8_2
-; GFX10-NEXT: ; %bb.1: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB8_3
-; GFX10-NEXT: .LBB8_2: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB8_3
-; GFX10-NEXT: .LBB8_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_bitcmp0_b32 s0, 0
-; GFX11-NEXT: s_cbranch_scc1 .LBB8_2
-; GFX11-NEXT: ; %bb.1: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB8_3
-; GFX11-NEXT: .LBB8_2: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB8_3
-; GFX11-NEXT: .LBB8_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
-; GFX10-GISEL-NEXT: s_and_b32 s0, s0, 1
-; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB8_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB8_3
-; GFX10-GISEL-NEXT: .LBB8_2: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB8_3
-; GFX10-GISEL-NEXT: .LBB8_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_non_compare:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_bitcmp0_b32 s0, 0
-; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB8_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB8_3
-; GFX11-GISEL-NEXT: .LBB8_2: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB8_3
-; GFX11-GISEL-NEXT: .LBB8_3:
+; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_bitcmp0_b32 s0, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
+; CHECK-NEXT: ; %bb.1: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB8_3
+; CHECK-NEXT: .LBB8_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB8_3
+; CHECK-NEXT: .LBB8_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -297,71 +130,18 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
-; CHECK-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB9_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB9_3
-; CHECK-SDAG-NEXT: .LBB9_2: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB9_3
-; CHECK-SDAG-NEXT: .LBB9_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-NEXT: s_cbranch_vccz .LBB9_2
-; GFX10-NEXT: ; %bb.1: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB9_3
-; GFX10-NEXT: .LBB9_2: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB9_3
-; GFX10-NEXT: .LBB9_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: s_cbranch_vccz .LBB9_2
-; GFX11-NEXT: ; %bb.1: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB9_3
-; GFX11-NEXT: .LBB9_2: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB9_3
-; GFX11-NEXT: .LBB9_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: s_cmp_lg_u32 vcc_lo, 0
-; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB9_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB9_3
-; GFX10-GISEL-NEXT: .LBB9_2: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB9_3
-; GFX10-GISEL-NEXT: .LBB9_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_non_compare:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB9_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB9_3
-; GFX11-GISEL-NEXT: .LBB9_2: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB9_3
-; GFX11-GISEL-NEXT: .LBB9_3:
+; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; CHECK-NEXT: s_cbranch_vccz .LBB9_2
+; CHECK-NEXT: ; %bb.1: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB9_3
+; CHECK-NEXT: .LBB9_2: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB9_3
+; CHECK-NEXT: .LBB9_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -373,76 +153,19 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: s_bitcmp1_b32 s0, 0
-; CHECK-SDAG-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-SDAG-NEXT: s_cbranch_vccnz .LBB10_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB10_3
-; CHECK-SDAG-NEXT: .LBB10_2: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB10_3
-; CHECK-SDAG-NEXT: .LBB10_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_bitcmp1_b32 s0, 0
-; GFX10-NEXT: s_cselect_b32 s0, -1, 0
-; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; GFX10-NEXT: s_cbranch_vccnz .LBB10_2
-; GFX10-NEXT: ; %bb.1: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB10_3
-; GFX10-NEXT: .LBB10_2: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB10_3
-; GFX10-NEXT: .LBB10_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_bitcmp1_b32 s0, 0
-; GFX11-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-NEXT: s_cbranch_vccnz .LBB10_2
-; GFX11-NEXT: ; %bb.1: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB10_3
-; GFX11-NEXT: .LBB10_2: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB10_3
-; GFX11-NEXT: .LBB10_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
-; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
-; GFX10-GISEL-NEXT: s_and_b32 s0, s0, 1
-; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB10_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB10_3
-; GFX10-GISEL-NEXT: .LBB10_2: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB10_3
-; GFX10-GISEL-NEXT: .LBB10_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_non_compare:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_bitcmp1_b32 s0, 0
-; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-GISEL-NEXT: s_cbranch_vccnz .LBB10_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB10_3
-; GFX11-GISEL-NEXT: .LBB10_2: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB10_3
-; GFX11-GISEL-NEXT: .LBB10_3:
+; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_bitcmp1_b32 s0, 0
+; CHECK-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-NEXT: s_cbranch_vccnz .LBB10_2
+; CHECK-NEXT: ; %bb.1: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB10_3
+; CHECK-NEXT: .LBB10_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB10_3
+; CHECK-NEXT: .LBB10_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -454,66 +177,17 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_compare:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB11_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB11_3
-; CHECK-SDAG-NEXT: .LBB11_2: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB11_3
-; CHECK-SDAG-NEXT: .LBB11_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_ne_zero_compare:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT: s_cbranch_vccz .LBB11_2
-; GFX10-NEXT: ; %bb.1: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB11_3
-; GFX10-NEXT: .LBB11_2: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB11_3
-; GFX10-NEXT: .LBB11_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_ne_zero_compare:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT: s_cbranch_vccz .LBB11_2
-; GFX11-NEXT: ; %bb.1: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB11_3
-; GFX11-NEXT: .LBB11_2: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB11_3
-; GFX11-NEXT: .LBB11_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-GISEL-NEXT: s_cmp_eq_u32 vcc_lo, 0
-; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB11_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB11_3
-; GFX10-GISEL-NEXT: .LBB11_2: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB11_3
-; GFX10-GISEL-NEXT: .LBB11_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_compare:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB11_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB11_3
-; GFX11-GISEL-NEXT: .LBB11_2: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB11_3
-; GFX11-GISEL-NEXT: .LBB11_3:
+; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT: s_cbranch_vccz .LBB11_2
+; CHECK-NEXT: ; %bb.1: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB11_3
+; CHECK-NEXT: .LBB11_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB11_3
+; CHECK-NEXT: .LBB11_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -525,65 +199,17 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_compare:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: s_cmp_gt_u32 s0, 11
-; CHECK-SDAG-NEXT: s_cbranch_scc1 .LBB12_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB12_3
-; CHECK-SDAG-NEXT: .LBB12_2: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB12_3
-; CHECK-SDAG-NEXT: .LBB12_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_ne_zero_compare:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_cmp_gt_u32 s0, 11
-; GFX10-NEXT: s_cbranch_scc1 .LBB12_2
-; GFX10-NEXT: ; %bb.1: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB12_3
-; GFX10-NEXT: .LBB12_2: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB12_3
-; GFX10-NEXT: .LBB12_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_ne_zero_compare:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_cmp_gt_u32 s0, 11
-; GFX11-NEXT: s_cbranch_scc1 .LBB12_2
-; GFX11-NEXT: ; %bb.1: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB12_3
-; GFX11-NEXT: .LBB12_2: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB12_3
-; GFX11-NEXT: .LBB12_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_cmp_ge_u32 s0, 12
-; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB12_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB12_3
-; GFX10-GISEL-NEXT: .LBB12_2: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB12_3
-; GFX10-GISEL-NEXT: .LBB12_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_compare:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_cmp_gt_u32 s0, 11
-; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB12_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB12_3
-; GFX11-GISEL-NEXT: .LBB12_2: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB12_3
-; GFX11-GISEL-NEXT: .LBB12_3:
+; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cmp_gt_u32 s0, 11
+; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
+; CHECK-NEXT: ; %bb.1: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB12_3
+; CHECK-NEXT: .LBB12_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB12_3
+; CHECK-NEXT: .LBB12_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -595,66 +221,17 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_compare:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB13_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB13_3
-; CHECK-SDAG-NEXT: .LBB13_2: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB13_3
-; CHECK-SDAG-NEXT: .LBB13_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_eq_zero_compare:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT: s_cbranch_vccz .LBB13_2
-; GFX10-NEXT: ; %bb.1: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB13_3
-; GFX10-NEXT: .LBB13_2: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB13_3
-; GFX10-NEXT: .LBB13_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_eq_zero_compare:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT: s_cbranch_vccz .LBB13_2
-; GFX11-NEXT: ; %bb.1: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB13_3
-; GFX11-NEXT: .LBB13_2: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB13_3
-; GFX11-NEXT: .LBB13_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-GISEL-NEXT: s_cmp_lg_u32 vcc_lo, 0
-; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB13_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB13_3
-; GFX10-GISEL-NEXT: .LBB13_2: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB13_3
-; GFX10-GISEL-NEXT: .LBB13_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_compare:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB13_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB13_3
-; GFX11-GISEL-NEXT: .LBB13_2: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB13_3
-; GFX11-GISEL-NEXT: .LBB13_3:
+; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT: s_cbranch_vccz .LBB13_2
+; CHECK-NEXT: ; %bb.1: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB13_3
+; CHECK-NEXT: .LBB13_2: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB13_3
+; CHECK-NEXT: .LBB13_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -666,17 +243,6 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
-; CHECK-BOTH-LABEL: branch_uniform_ballot_eq_zero_compare:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-BOTH-NEXT: s_cbranch_scc1 .LBB14_2
-; CHECK-BOTH-NEXT: ; %bb.1: ; %true
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT: s_branch .LBB14_3
-; CHECK-BOTH-NEXT: .LBB14_2: ; %false
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT: s_branch .LBB14_3
-; CHECK-BOTH-NEXT: .LBB14_3:
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
@@ -699,76 +265,19 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_ne_zero_and:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-SDAG-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB15_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB15_3
-; CHECK-SDAG-NEXT: .LBB15_2: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB15_3
-; CHECK-SDAG-NEXT: .LBB15_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_ne_zero_and:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; GFX10-NEXT: s_cbranch_vccz .LBB15_2
-; GFX10-NEXT: ; %bb.1: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB15_3
-; GFX10-NEXT: .LBB15_2: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB15_3
-; GFX10-NEXT: .LBB15_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_ne_zero_and:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; GFX11-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; GFX11-NEXT: s_cbranch_vccz .LBB15_2
-; GFX11-NEXT: ; %bb.1: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB15_3
-; GFX11-NEXT: .LBB15_2: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB15_3
-; GFX11-NEXT: .LBB15_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; GFX10-GISEL-NEXT: s_and_b32 s0, vcc_lo, s0
-; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
-; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB15_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB15_3
-; GFX10-GISEL-NEXT: .LBB15_2: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB15_3
-; GFX10-GISEL-NEXT: .LBB15_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_ne_zero_and:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB15_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB15_3
-; GFX11-GISEL-NEXT: .LBB15_2: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB15_3
-; GFX11-GISEL-NEXT: .LBB15_3:
+; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-NEXT: s_cbranch_vccz .LBB15_2
+; CHECK-NEXT: ; %bb.1: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB15_3
+; CHECK-NEXT: .LBB15_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB15_3
+; CHECK-NEXT: .LBB15_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -782,90 +291,22 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_ne_zero_and:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: s_cmp_gt_u32 s0, 11
-; CHECK-SDAG-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-SDAG-NEXT: s_cmp_lt_u32 s1, 35
-; CHECK-SDAG-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-SDAG-NEXT: s_or_b32 s0, s0, s1
-; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-SDAG-NEXT: s_cbranch_vccnz .LBB16_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB16_3
-; CHECK-SDAG-NEXT: .LBB16_2: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB16_3
-; CHECK-SDAG-NEXT: .LBB16_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_ne_zero_and:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_cmp_gt_u32 s0, 11
-; GFX10-NEXT: s_cselect_b32 s0, -1, 0
-; GFX10-NEXT: s_cmp_lt_u32 s1, 35
-; GFX10-NEXT: s_cselect_b32 s1, -1, 0
-; GFX10-NEXT: s_or_b32 s0, s0, s1
-; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; GFX10-NEXT: s_cbranch_vccnz .LBB16_2
-; GFX10-NEXT: ; %bb.1: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB16_3
-; GFX10-NEXT: .LBB16_2: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB16_3
-; GFX10-NEXT: .LBB16_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_ne_zero_and:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_cmp_gt_u32 s0, 11
-; GFX11-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-NEXT: s_cmp_lt_u32 s1, 35
-; GFX11-NEXT: s_cselect_b32 s1, -1, 0
-; GFX11-NEXT: s_or_b32 s0, s0, s1
-; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-NEXT: s_cbranch_vccnz .LBB16_2
-; GFX11-NEXT: ; %bb.1: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB16_3
-; GFX11-NEXT: .LBB16_2: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB16_3
-; GFX11-NEXT: .LBB16_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_cmp_ge_u32 s0, 12
-; GFX10-GISEL-NEXT: s_cselect_b32 s0, 1, 0
-; GFX10-GISEL-NEXT: s_cmp_le_u32 s1, 34
-; GFX10-GISEL-NEXT: s_cselect_b32 s1, 1, 0
-; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s1
-; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB16_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB16_3
-; GFX10-GISEL-NEXT: .LBB16_2: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB16_3
-; GFX10-GISEL-NEXT: .LBB16_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_ne_zero_and:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_cmp_gt_u32 s0, 11
-; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-GISEL-NEXT: s_cmp_lt_u32 s1, 35
-; GFX11-GISEL-NEXT: s_cselect_b32 s1, -1, 0
-; GFX11-GISEL-NEXT: s_or_b32 s0, s0, s1
-; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-GISEL-NEXT: s_cbranch_vccnz .LBB16_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB16_3
-; GFX11-GISEL-NEXT: .LBB16_2: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB16_3
-; GFX11-GISEL-NEXT: .LBB16_3:
+; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cmp_gt_u32 s0, 11
+; CHECK-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-NEXT: s_cmp_lt_u32 s1, 35
+; CHECK-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-NEXT: s_or_b32 s0, s0, s1
+; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-NEXT: s_cbranch_vccnz .LBB16_2
+; CHECK-NEXT: ; %bb.1: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB16_3
+; CHECK-NEXT: .LBB16_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB16_3
+; CHECK-NEXT: .LBB16_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -879,75 +320,19 @@ false:
}
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
-; CHECK-SDAG-LABEL: branch_divergent_ballot_eq_zero_and:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-SDAG-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-SDAG-NEXT: s_cbranch_vccz .LBB17_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB17_3
-; CHECK-SDAG-NEXT: .LBB17_2: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB17_3
-; CHECK-SDAG-NEXT: .LBB17_3:
-;
-; GFX10-LABEL: branch_divergent_ballot_eq_zero_and:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; GFX10-NEXT: s_cbranch_vccz .LBB17_2
-; GFX10-NEXT: ; %bb.1: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB17_3
-; GFX10-NEXT: .LBB17_2: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB17_3
-; GFX10-NEXT: .LBB17_3:
-;
-; GFX11-LABEL: branch_divergent_ballot_eq_zero_and:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; GFX11-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; GFX11-NEXT: s_cbranch_vccz .LBB17_2
-; GFX11-NEXT: ; %bb.1: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB17_3
-; GFX11-NEXT: .LBB17_2: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB17_3
-; GFX11-NEXT: .LBB17_3:
-;
-; GFX10-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX10-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; GFX10-GISEL-NEXT: s_and_b32 s0, vcc_lo, s0
-; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB17_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB17_3
-; GFX10-GISEL-NEXT: .LBB17_2: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB17_3
-; GFX10-GISEL-NEXT: .LBB17_3:
-;
-; GFX11-GISEL-LABEL: branch_divergent_ballot_eq_zero_and:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GFX11-GISEL-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; GFX11-GISEL-NEXT: s_cbranch_vccz .LBB17_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB17_3
-; GFX11-GISEL-NEXT: .LBB17_2: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB17_3
-; GFX11-GISEL-NEXT: .LBB17_3:
+; CHECK-LABEL: branch_divergent_ballot_eq_zero_and:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-NEXT: s_cbranch_vccz .LBB17_2
+; CHECK-NEXT: ; %bb.1: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB17_3
+; CHECK-NEXT: .LBB17_2: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB17_3
+; CHECK-NEXT: .LBB17_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -961,90 +346,22 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_eq_zero_and:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-SDAG-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-SDAG-NEXT: s_cmp_gt_u32 s1, 34
-; CHECK-SDAG-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-SDAG-NEXT: s_and_b32 s0, s0, s1
-; CHECK-SDAG-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; CHECK-SDAG-NEXT: s_cbranch_vccnz .LBB18_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB18_3
-; CHECK-SDAG-NEXT: .LBB18_2: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB18_3
-; CHECK-SDAG-NEXT: .LBB18_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_eq_zero_and:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_cmp_lt_u32 s0, 12
-; GFX10-NEXT: s_cselect_b32 s0, -1, 0
-; GFX10-NEXT: s_cmp_gt_u32 s1, 34
-; GFX10-NEXT: s_cselect_b32 s1, -1, 0
-; GFX10-NEXT: s_and_b32 s0, s0, s1
-; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; GFX10-NEXT: s_cbranch_vccnz .LBB18_2
-; GFX10-NEXT: ; %bb.1: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB18_3
-; GFX10-NEXT: .LBB18_2: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB18_3
-; GFX10-NEXT: .LBB18_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_eq_zero_and:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_cmp_lt_u32 s0, 12
-; GFX11-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-NEXT: s_cmp_gt_u32 s1, 34
-; GFX11-NEXT: s_cselect_b32 s1, -1, 0
-; GFX11-NEXT: s_and_b32 s0, s0, s1
-; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-NEXT: s_cbranch_vccnz .LBB18_2
-; GFX11-NEXT: ; %bb.1: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB18_3
-; GFX11-NEXT: .LBB18_2: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB18_3
-; GFX11-NEXT: .LBB18_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_cmp_lt_u32 s0, 12
-; GFX10-GISEL-NEXT: s_cselect_b32 s0, 1, 0
-; GFX10-GISEL-NEXT: s_cmp_gt_u32 s1, 34
-; GFX10-GISEL-NEXT: s_cselect_b32 s1, 1, 0
-; GFX10-GISEL-NEXT: s_and_b32 s0, s0, s1
-; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB18_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB18_3
-; GFX10-GISEL-NEXT: .LBB18_2: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB18_3
-; GFX10-GISEL-NEXT: .LBB18_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_eq_zero_and:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_cmp_lt_u32 s0, 12
-; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-GISEL-NEXT: s_cmp_gt_u32 s1, 34
-; GFX11-GISEL-NEXT: s_cselect_b32 s1, -1, 0
-; GFX11-GISEL-NEXT: s_and_b32 s0, s0, s1
-; GFX11-GISEL-NEXT: s_and_b32 vcc_lo, exec_lo, s0
-; GFX11-GISEL-NEXT: s_cbranch_vccnz .LBB18_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB18_3
-; GFX11-GISEL-NEXT: .LBB18_2: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB18_3
-; GFX11-GISEL-NEXT: .LBB18_3:
+; CHECK-LABEL: branch_uniform_ballot_eq_zero_and:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-NEXT: s_and_b32 s0, s0, s1
+; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; CHECK-NEXT: s_cbranch_vccnz .LBB18_2
+; CHECK-NEXT: ; %bb.1: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB18_3
+; CHECK-NEXT: .LBB18_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB18_3
+; CHECK-NEXT: .LBB18_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
%c = and i1 %v1c, %v2c
@@ -1058,72 +375,18 @@ false:
}
define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) {
-; CHECK-SDAG-LABEL: branch_uniform_ballot_sgt_N_compare:
-; CHECK-SDAG: ; %bb.0:
-; CHECK-SDAG-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
-; CHECK-SDAG-NEXT: s_cmp_lt_i32 s0, 23
-; CHECK-SDAG-NEXT: s_cbranch_scc1 .LBB19_2
-; CHECK-SDAG-NEXT: ; %bb.1: ; %true
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 42
-; CHECK-SDAG-NEXT: s_branch .LBB19_3
-; CHECK-SDAG-NEXT: .LBB19_2: ; %false
-; CHECK-SDAG-NEXT: s_mov_b32 s0, 33
-; CHECK-SDAG-NEXT: s_branch .LBB19_3
-; CHECK-SDAG-NEXT: .LBB19_3:
-;
-; GFX10-LABEL: branch_uniform_ballot_sgt_N_compare:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
-; GFX10-NEXT: s_cmp_lt_i32 s0, 23
-; GFX10-NEXT: s_cbranch_scc1 .LBB19_2
-; GFX10-NEXT: ; %bb.1: ; %true
-; GFX10-NEXT: s_mov_b32 s0, 42
-; GFX10-NEXT: s_branch .LBB19_3
-; GFX10-NEXT: .LBB19_2: ; %false
-; GFX10-NEXT: s_mov_b32 s0, 33
-; GFX10-NEXT: s_branch .LBB19_3
-; GFX10-NEXT: .LBB19_3:
-;
-; GFX11-LABEL: branch_uniform_ballot_sgt_N_compare:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
-; GFX11-NEXT: s_cmp_lt_i32 s0, 23
-; GFX11-NEXT: s_cbranch_scc1 .LBB19_2
-; GFX11-NEXT: ; %bb.1: ; %true
-; GFX11-NEXT: s_mov_b32 s0, 42
-; GFX11-NEXT: s_branch .LBB19_3
-; GFX11-NEXT: .LBB19_2: ; %false
-; GFX11-NEXT: s_mov_b32 s0, 33
-; GFX11-NEXT: s_branch .LBB19_3
-; GFX11-NEXT: .LBB19_3:
-;
-; GFX10-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_cmp_lt_u32 s0, 12
-; GFX10-GISEL-NEXT: s_cselect_b32 s0, exec_lo, 0
-; GFX10-GISEL-NEXT: s_and_b32 s0, s0, exec_lo
-; GFX10-GISEL-NEXT: s_cmp_le_i32 s0, 22
-; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB19_2
-; GFX10-GISEL-NEXT: ; %bb.1: ; %true
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX10-GISEL-NEXT: s_branch .LBB19_3
-; GFX10-GISEL-NEXT: .LBB19_2: ; %false
-; GFX10-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX10-GISEL-NEXT: s_branch .LBB19_3
-; GFX10-GISEL-NEXT: .LBB19_3:
-;
-; GFX11-GISEL-LABEL: branch_uniform_ballot_sgt_N_compare:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
-; GFX11-GISEL-NEXT: s_cmp_lt_i32 s0, 23
-; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB19_2
-; GFX11-GISEL-NEXT: ; %bb.1: ; %true
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 42
-; GFX11-GISEL-NEXT: s_branch .LBB19_3
-; GFX11-GISEL-NEXT: .LBB19_2: ; %false
-; GFX11-GISEL-NEXT: s_mov_b32 s0, 33
-; GFX11-GISEL-NEXT: s_branch .LBB19_3
-; GFX11-GISEL-NEXT: .LBB19_3:
+; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
+; CHECK-NEXT: s_cmp_lt_i32 s0, 23
+; CHECK-NEXT: s_cbranch_scc1 .LBB19_2
+; CHECK-NEXT: ; %bb.1: ; %true
+; CHECK-NEXT: s_mov_b32 s0, 42
+; CHECK-NEXT: s_branch .LBB19_3
+; CHECK-NEXT: .LBB19_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB19_3
+; CHECK-NEXT: .LBB19_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
%bc = icmp sgt i32 %ballot, 22
@@ -1137,19 +400,6 @@ false:
declare i32 @llvm.amdgcn.icmp.i32(i1, i1, i32)
define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
-; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-BOTH-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-BOTH-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-BOTH-NEXT: s_cbranch_vccnz .LBB20_2
-; CHECK-BOTH-NEXT: ; %bb.1: ; %true
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT: s_branch .LBB20_3
-; CHECK-BOTH-NEXT: .LBB20_2: ; %false
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT: s_branch .LBB20_3
-; CHECK-BOTH-NEXT: .LBB20_3:
; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -1190,22 +440,6 @@ define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_ne_zero_and(i32 in
; s_cselect_b32 s0, s0, 0
; s_and_b32 s0, s0, exec_lo
; By selecting into vcc(_lo) instead, we could even avoid the AND-with-exec.
-; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-BOTH-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-BOTH-NEXT: s_cmp_gt_u32 s1, 34
-; CHECK-BOTH-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-BOTH-NEXT: s_and_b32 s0, s0, s1
-; CHECK-BOTH-NEXT: s_and_b32 s0, s0, exec_lo
-; CHECK-BOTH-NEXT: s_cbranch_scc1 .LBB21_2
-; CHECK-BOTH-NEXT: ; %bb.1: ; %true
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT: s_branch .LBB21_3
-; CHECK-BOTH-NEXT: .LBB21_2: ; %false
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT: s_branch .LBB21_3
-; CHECK-BOTH-NEXT: .LBB21_3:
; CHECK-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
@@ -1235,19 +469,6 @@ false:
}
define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) {
-; CHECK-BOTH-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-BOTH-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-BOTH-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
-; CHECK-BOTH-NEXT: s_cbranch_vccnz .LBB22_2
-; CHECK-BOTH-NEXT: ; %bb.1: ; %false
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT: s_branch .LBB22_3
-; CHECK-BOTH-NEXT: .LBB22_2: ; %true
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT: s_branch .LBB22_3
-; CHECK-BOTH-NEXT: .LBB22_3:
; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
@@ -1274,22 +495,6 @@ false:
}
define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
-; CHECK-BOTH-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
-; CHECK-BOTH: ; %bb.0:
-; CHECK-BOTH-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-BOTH-NEXT: s_cselect_b32 s0, -1, 0
-; CHECK-BOTH-NEXT: s_cmp_gt_u32 s1, 34
-; CHECK-BOTH-NEXT: s_cselect_b32 s1, -1, 0
-; CHECK-BOTH-NEXT: s_and_b32 s0, s0, s1
-; CHECK-BOTH-NEXT: s_and_b32 s0, s0, exec_lo
-; CHECK-BOTH-NEXT: s_cbranch_scc1 .LBB23_2
-; CHECK-BOTH-NEXT: ; %bb.1: ; %false
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 33
-; CHECK-BOTH-NEXT: s_branch .LBB23_3
-; CHECK-BOTH-NEXT: .LBB23_2: ; %true
-; CHECK-BOTH-NEXT: s_mov_b32 s0, 42
-; CHECK-BOTH-NEXT: s_branch .LBB23_3
-; CHECK-BOTH-NEXT: .LBB23_3:
; CHECK-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
@@ -1368,58 +573,6 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
; GFX11-NEXT: v_mov_b32_e32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: non_cst_non_compare_input:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_and_b32 s0, s0, 1
-; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
-; GFX10-GISEL-NEXT: s_cmp_lg_u32 s0, 0
-; GFX10-GISEL-NEXT: s_cselect_b32 s0, exec_lo, 0
-; GFX10-GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
-; GFX10-GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX10-GISEL-NEXT: ; %bb.1: ; %B
-; GFX10-GISEL-NEXT: v_cmp_gt_u32_e32 vcc_lo, 2, v2
-; GFX10-GISEL-NEXT: s_andn2_b32 s0, s0, exec_lo
-; GFX10-GISEL-NEXT: ; implicit-def: $vgpr2
-; GFX10-GISEL-NEXT: s_and_b32 s2, exec_lo, vcc_lo
-; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s2
-; GFX10-GISEL-NEXT: ; %bb.2: ; %Flow
-; GFX10-GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX10-GISEL-NEXT: ; %bb.3: ; %A
-; GFX10-GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 1, v2
-; GFX10-GISEL-NEXT: s_andn2_b32 s0, s0, exec_lo
-; GFX10-GISEL-NEXT: s_and_b32 s2, exec_lo, vcc_lo
-; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s2
-; GFX10-GISEL-NEXT: ; %bb.4: ; %exit
-; GFX10-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX10-GISEL-NEXT: s_and_b32 s0, s0, exec_lo
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX10-GISEL-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-GISEL-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: non_cst_non_compare_input:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo
-; GFX11-GISEL-NEXT: ; implicit-def: $sgpr0
-; GFX11-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v3
-; GFX11-GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX11-GISEL-NEXT: ; %bb.1: ; %B
-; GFX11-GISEL-NEXT: v_cmp_gt_u32_e64 s0, 2, v2
-; GFX11-GISEL-NEXT: ; implicit-def: $vgpr2
-; GFX11-GISEL-NEXT: ; %bb.2: ; %Flow
-; GFX11-GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX11-GISEL-NEXT: ; %bb.3: ; %A
-; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
-; GFX11-GISEL-NEXT: s_and_not1_b32 s0, s0, exec_lo
-; GFX11-GISEL-NEXT: s_and_b32 s2, vcc_lo, exec_lo
-; GFX11-GISEL-NEXT: s_or_b32 s0, s0, s2
-; GFX11-GISEL-NEXT: ; %bb.4: ; %exit
-; GFX11-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX11-GISEL-NEXT: v_cmp_ne_u32_e64 s0, 0, v2
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-GISEL-NEXT: s_endpgm
entry:
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %A, label %B
@@ -1455,22 +608,6 @@ define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) {
; GFX11-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, v1, v2
; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX10-GISEL-LABEL: compare_bfloats:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX10-GISEL-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
-; GFX10-GISEL-NEXT: ; return to shader part epilog
-;
-; GFX11-GISEL-LABEL: compare_bfloats:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: v_mov_b16_e32 v2.l, 0
-; GFX11-GISEL-NEXT: v_mov_b16_e32 v2.h, v1.l
-; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.h, v0.l
-; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.l, v2.l
-; GFX11-GISEL-NEXT: v_cmp_gt_f32_e64 s0, v1, v2
-; GFX11-GISEL-NEXT: ; return to shader part epilog
%cmp = fcmp ogt bfloat %x, %y
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
ret i32 %ballot
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
index edda79b813378..678ee5d9e95c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
@@ -2,8 +2,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=SDAG-GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=SDAG-GFX10 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
declare i32 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
declare i32 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
index 67a973d3b0e07..80d77c27b3ec2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-SDAG %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI-GISEL %s
declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
index f8ccd40d37bcc..7f79e60857d96 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
@@ -2,8 +2,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,SDAG-GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,SDAG-GFX10 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" < %s | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
declare i32 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
@@ -1609,33 +1609,65 @@ define amdgpu_kernel void @v_icmp_i16_sle(ptr addrspace(1) %out, i16 %src) {
}
define amdgpu_kernel void @v_icmp_i1_ne0(ptr addrspace(1) %out, i32 %a, i32 %b) {
-; GFX11-LABEL: v_icmp_i1_ne0:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_cmp_gt_u32 s2, 1
-; GFX11-NEXT: s_cselect_b32 s2, -1, 0
-; GFX11-NEXT: s_cmp_gt_u32 s3, 2
-; GFX11-NEXT: s_cselect_b32 s3, -1, 0
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_and_b32 s2, s2, s3
-; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT: s_endpgm
-;
-; GFX10-LABEL: v_icmp_i1_ne0:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_cmp_gt_u32 s2, 1
-; GFX10-NEXT: s_cselect_b32 s2, -1, 0
-; GFX10-NEXT: s_cmp_gt_u32 s3, 2
-; GFX10-NEXT: s_cselect_b32 s3, -1, 0
-; GFX10-NEXT: s_and_b32 s2, s2, s3
-; GFX10-NEXT: v_mov_b32_e32 v1, s2
-; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-NEXT: s_endpgm
+; SDAG-GFX11-LABEL: v_icmp_i1_ne0:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: s_cmp_gt_u32 s2, 1
+; SDAG-GFX11-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-GFX11-NEXT: s_cmp_gt_u32 s3, 2
+; SDAG-GFX11-NEXT: s_cselect_b32 s3, -1, 0
+; SDAG-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG-GFX11-NEXT: s_and_b32 s2, s2, s3
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i1_ne0:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: s_cmp_gt_u32 s2, 1
+; SDAG-GFX10-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-GFX10-NEXT: s_cmp_gt_u32 s3, 2
+; SDAG-GFX10-NEXT: s_cselect_b32 s3, -1, 0
+; SDAG-GFX10-NEXT: s_and_b32 s2, s2, s3
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i1_ne0:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: s_cmp_gt_u32 s2, 1
+; GISEL-GFX11-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-GFX11-NEXT: s_cmp_gt_u32 s3, 2
+; GISEL-GFX11-NEXT: s_cselect_b32 s3, 1, 0
+; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-GFX11-NEXT: s_and_b32 s2, s2, s3
+; GISEL-GFX11-NEXT: v_cmp_ne_u32_e64 s2, s2, 0
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i1_ne0:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: s_cmp_gt_u32 s2, 1
+; GISEL-GFX10-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-GFX10-NEXT: s_cmp_gt_u32 s3, 2
+; GISEL-GFX10-NEXT: s_cselect_b32 s3, 1, 0
+; GISEL-GFX10-NEXT: s_and_b32 s2, s2, s3
+; GISEL-GFX10-NEXT: v_cmp_ne_u32_e64 s2, s2, 0
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
%c0 = icmp ugt i32 %a, 1
%c1 = icmp ugt i32 %b, 2
%src = and i1 %c0, %c1
@@ -1670,3 +1702,5 @@ define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(ptr addrspace(1) %out, i32
attributes #0 = { nounwind readnone convergent }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
+; GFX10: {{.*}}
+; GFX11: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
index e6278cf2a52ee..aca943eb6f839 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
@@ -3,9 +3,9 @@
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,SDAG-VI %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,SDAG-GFX9 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -global-isel-abort=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
@@ -1877,52 +1877,103 @@ define amdgpu_kernel void @v_icmp_i16_sle(ptr addrspace(1) %out, i16 %src) {
}
define amdgpu_kernel void @v_icmp_i1_ne0(ptr addrspace(1) %out, i32 %a, i32 %b) {
-; GFX11-LABEL: v_icmp_i1_ne0:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_cmp_gt_u32 s2, 1
-; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GFX11-NEXT: s_cmp_gt_u32 s3, 2
-; GFX11-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
-; GFX11-NEXT: v_mov_b32_e32 v0, s2
-; GFX11-NEXT: v_mov_b32_e32 v1, s3
-; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX11-NEXT: s_endpgm
+; SDAG-GFX11-LABEL: v_icmp_i1_ne0:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; SDAG-GFX11-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: s_cmp_gt_u32 s2, 1
+; SDAG-GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0
+; SDAG-GFX11-NEXT: s_cmp_gt_u32 s3, 2
+; SDAG-GFX11-NEXT: s_cselect_b64 s[2:3], -1, 0
+; SDAG-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG-GFX11-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
+; SDAG-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-GFX11-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; SDAG-GFX11-NEXT: s_endpgm
;
-; VI-LABEL: v_icmp_i1_ne0:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_cmp_gt_u32 s2, 1
-; VI-NEXT: s_cselect_b64 s[4:5], -1, 0
-; VI-NEXT: s_cmp_gt_u32 s3, 2
-; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
-; VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
-;
-; GFX9-LABEL: v_icmp_i1_ne0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_cmp_gt_u32 s2, 1
-; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GFX9-NEXT: s_cmp_gt_u32 s3, 2
-; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX9-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9-NEXT: s_endpgm
+; SDAG-VI-LABEL: v_icmp_i1_ne0:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: s_cmp_gt_u32 s2, 1
+; SDAG-VI-NEXT: s_cselect_b64 s[4:5], -1, 0
+; SDAG-VI-NEXT: s_cmp_gt_u32 s3, 2
+; SDAG-VI-NEXT: s_cselect_b64 s[2:3], -1, 0
+; SDAG-VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i1_ne0:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: s_cmp_gt_u32 s2, 1
+; SDAG-GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0
+; SDAG-GFX9-NEXT: s_cmp_gt_u32 s3, 2
+; SDAG-GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0
+; SDAG-GFX9-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i1_ne0:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: s_cmp_gt_u32 s2, 1
+; GISEL-GFX11-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-GFX11-NEXT: s_cmp_gt_u32 s3, 2
+; GISEL-GFX11-NEXT: s_cselect_b32 s3, 1, 0
+; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-GFX11-NEXT: s_and_b32 s2, s2, s3
+; GISEL-GFX11-NEXT: v_cmp_ne_u32_e64 s[2:3], s2, 0
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i1_ne0:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: s_cmp_gt_u32 s2, 1
+; GISEL-VI-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-VI-NEXT: s_cmp_gt_u32 s3, 2
+; GISEL-VI-NEXT: s_cselect_b32 s3, 1, 0
+; GISEL-VI-NEXT: s_and_b32 s2, s2, s3
+; GISEL-VI-NEXT: v_cmp_ne_u32_e64 s[2:3], s2, 0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i1_ne0:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: s_cmp_gt_u32 s2, 1
+; GISEL-GFX9-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-GFX9-NEXT: s_cmp_gt_u32 s3, 2
+; GISEL-GFX9-NEXT: s_cselect_b32 s3, 1, 0
+; GISEL-GFX9-NEXT: s_and_b32 s2, s2, s3
+; GISEL-GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], s2, 0
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
%c0 = icmp ugt i32 %a, 1
%c1 = icmp ugt i32 %b, 2
%src = and i1 %c0, %c1
@@ -1966,3 +2017,4 @@ define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(ptr addrspace(1) %out, i32
attributes #0 = { nounwind readnone convergent }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
+; VI: {{.*}}
More information about the llvm-commits
mailing list