[llvm] [AMDGPU] Add regbankselect rules for G_ICMP/G_FCMP (PR #172048)
Anshil Gandhi via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 13 12:55:07 PST 2025
https://github.com/gandhi56 updated https://github.com/llvm/llvm-project/pull/172048
>From 41062f95c33b1498f1256d735140d6b94c5d2a31 Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <Anshil.Gandhi at amd.com>
Date: Fri, 12 Dec 2025 01:04:35 -0600
Subject: [PATCH] [AMDGPU] Add regbankselect rules for G_ICMP/G_FCMP
Legalize G_ICMP and G_FCMP for S16, S32 and S64 operands.
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 16 ++-
llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll | 102 ++++++++++++++++++
llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll | 101 +++++++++++++++++
3 files changed, 217 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index d01afee331025..eacba21cbc82f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -561,13 +561,25 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
addRulesForGOpcs({G_ICMP})
+ .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr32AExt, Sgpr32AExt}}})
+ .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
.Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
.Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
+ .Any({{UniS1, _, S64}, {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}})
.Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
addRulesForGOpcs({G_FCMP})
- .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}})
- .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}});
+ // S16: Use scalar on GFX11+, vector otherwise
+ .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}}, ST->hasSALUFloatInsts())
+ .Any({{UniS1, _, S16}, {{UniInVcc}, {None, Vgpr16, Vgpr16}}}, !ST->hasSALUFloatInsts())
+ .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
+ // S32 : Use scalar on GFX11+, vector otherwise
+ .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}}, ST->hasSALUFloatInsts())
+ .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}}, !ST->hasSALUFloatInsts())
+ .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
+ // S64 : No scalar f64 compare exists, always use vector
+ .Any({{UniS1, _, S64}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
+ .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
addRulesForGOpcs({G_BRCOND})
.Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll
new file mode 100644
index 0000000000000..3ef8627f8b2a0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel -new-reg-bank-select < %s | FileCheck %s
+
+define i1 @fcmp_f16_uniform(half inreg %a, half inreg %b) {
+; CHECK-LABEL: fcmp_f16_uniform:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: s_cmp_eq_f16 s0, s1
+; CHECK-NEXT: s_cselect_b32 s0, 1, 0
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = fcmp oeq half %a, %b
+ ret i1 %result
+}
+
+define i1 @fcmp_f16_divergent(half %a, half %b) {
+; CHECK-LABEL: fcmp_f16_divergent:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: v_cmp_eq_f16_e32 vcc_lo, v0, v1
+; CHECK-NEXT: s_wait_alu depctr_va_vcc(0)
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = fcmp oeq half %a, %b
+ ret i1 %result
+}
+
+define i1 @fcmp_f32_uniform(float inreg %a, float inreg %b) {
+; CHECK-LABEL: fcmp_f32_uniform:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: s_cmp_eq_f32 s0, s1
+; CHECK-NEXT: s_cselect_b32 s0, 1, 0
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = fcmp oeq float %a, %b
+ ret i1 %result
+}
+
+define i1 @fcmp_f32_divergent(float %a, float %b) {
+; CHECK-LABEL: fcmp_f32_divergent:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1
+; CHECK-NEXT: s_wait_alu depctr_va_vcc(0)
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = fcmp oeq float %a, %b
+ ret i1 %result
+}
+
+define i1 @fcmp_f64_uniform(double inreg %a, double inreg %b) {
+; CHECK-LABEL: fcmp_f64_uniform:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: v_cmp_eq_f64_e64 s0, s[0:1], s[2:3]
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
+; CHECK-NEXT: s_cselect_b32 s0, 1, 0
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = fcmp oeq double %a, %b
+ ret i1 %result
+}
+
+define i1 @fcmp_f64_divergent(double %a, double %b) {
+; CHECK-LABEL: fcmp_f64_divergent:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[2:3]
+; CHECK-NEXT: s_wait_alu depctr_va_vcc(0)
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = fcmp oeq double %a, %b
+ ret i1 %result
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll
new file mode 100644
index 0000000000000..e6f9877a43c7f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel -new-reg-bank-select < %s | FileCheck %s
+
+define i1 @icmp_i16_uniform(i16 inreg %a, i16 inreg %b) {
+; CHECK-LABEL: icmp_i16_uniform:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: s_cmp_eq_u32 s0, s1
+; CHECK-NEXT: s_cselect_b32 s0, 1, 0
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = icmp eq i16 %a, %b
+ ret i1 %result
+}
+
+define i1 @icmp_i16_divergent(i16 %a, i16 %b) {
+; CHECK-LABEL: icmp_i16_divergent:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, v0, v1
+; CHECK-NEXT: s_wait_alu depctr_va_vcc(0)
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = icmp eq i16 %a, %b
+ ret i1 %result
+}
+
+define i1 @icmp_i32_uniform(i32 inreg %a, i32 inreg %b) {
+; CHECK-LABEL: icmp_i32_uniform:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: s_cmp_eq_u32 s0, s1
+; CHECK-NEXT: s_cselect_b32 s0, 1, 0
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = icmp eq i32 %a, %b
+ ret i1 %result
+}
+
+define i1 @icmp_i32_divergent(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i32_divergent:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
+; CHECK-NEXT: s_wait_alu depctr_va_vcc(0)
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = icmp eq i32 %a, %b
+ ret i1 %result
+}
+
+define i1 @icmp_i64_uniform(i64 inreg %a, i64 inreg %b) {
+; CHECK-LABEL: icmp_i64_uniform:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: s_cmp_eq_u64 s[0:1], s[2:3]
+; CHECK-NEXT: s_cselect_b32 s0, 1, 0
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = icmp eq i64 %a, %b
+ ret i1 %result
+}
+
+define i1 @icmp_i64_divergent(i64 %a, i64 %b) {
+; CHECK-LABEL: icmp_i64_divergent:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
+; CHECK-NEXT: s_wait_alu depctr_va_vcc(0)
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = icmp eq i64 %a, %b
+ ret i1 %result
+}
More information about the llvm-commits
mailing list