[llvm] [AMDGPU][SDAG] Support source modifiers as integer on select (PR #147325)
Chris Jackson via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 7 09:13:05 PDT 2025
https://github.com/chrisjbris updated https://github.com/llvm/llvm-project/pull/147325
>From 470be6180f971bc55d5b0384d6178a55ced366b7 Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Wed, 2 Jul 2025 04:39:04 -0500
Subject: [PATCH 1/6] Add new test for source modifiers on select
---
.../AMDGPU/integer-select-source-modifiers.ll | 68 +++++++++++++++++++
1 file changed, 68 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll
diff --git a/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll b/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll
new file mode 100644
index 0000000000000..6e7ff16b74139
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+
+define i32 @fneg_select_i32(i32 %cond, i32 %a, i32 %b) {
+ %neg.a = xor i32 %a, u0x80000000
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %neg.a, i32 %b
+ ret i32 %select
+}
+
+define <2 x i32> @fneg_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+ %neg.a = xor <2 x i32> %a, splat (i32 u0x80000000)
+ %cmp = icmp eq <2 x i32> %cond, zeroinitializer
+ %select = select <2 x i1> %cmp, <2 x i32> %neg.a, <2 x i32> %b
+ ret <2 x i32> %select
+}
+
+define i32 @fabs_select_i32(i32 %cond, i32 %a, i32 %b) {
+ %neg.a = and i32 %a, u0x7fffffff
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %neg.a, i32 %b
+ ret i32 %select
+}
+
+define <2 x i32> @fabs_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+ %neg.a = and <2 x i32> %a, splat (i32 u0x7fffffff)
+ %cmp = icmp eq <2 x i32> %cond, zeroinitializer
+ %select = select <2 x i1> %cmp, <2 x i32> %neg.a, <2 x i32> %b
+ ret <2 x i32> %select
+}
+
+define i32 @fneg_fabs_select_i32(i32 %cond, i32 %a, i32 %b) {
+ %neg.a = or i32 %a, u0x80000000
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %neg.a, i32 %b
+ ret i32 %select
+}
+
+define <2 x i32> @fneg_fabs_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+ %neg.a = or <2 x i32> %a, splat (i32 u0x80000000)
+ %cmp = icmp eq <2 x i32> %cond, zeroinitializer
+ %select = select <2 x i1> %cmp, <2 x i32> %neg.a, <2 x i32> %b
+ ret <2 x i32> %select
+}
+
+define i64 @fneg_select_i64(i64 %cond, i64 %a, i64 %b) {
+ %neg.a = xor i64 %a, u0x8000000000000000
+ %cmp = icmp eq i64 %cond, zeroinitializer
+ %select = select i1 %cmp, i64 %neg.a, i64 %b
+ ret i64 %select
+}
+
+define i64 @fabs_select_i64(i64 %cond, i64 %a, i64 %b) {
+ %neg.a = and i64 %a, u0x7fffffffffffffff
+ %cmp = icmp eq i64 %cond, zeroinitializer
+ %select = select i1 %cmp, i64 %neg.a, i64 %b
+ ret i64 %select
+}
+
+define i64 @fneg_fabs_select_i64(i64 %cond, i64 %a, i64 %b) {
+ %neg.a = or i64 %a, u0x8000000000000000
+ %cmp = icmp eq i64 %cond, zeroinitializer
+ %select = select i1 %cmp, i64 %neg.a, i64 %b
+ ret i64 %select
+}
>From c1c30eae41afc94ec16207158741eaaac7e9edd5 Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Wed, 2 Jul 2025 04:40:36 -0500
Subject: [PATCH 2/6] Populate check-lines before patching
---
.../AMDGPU/integer-select-source-modifiers.ll | 170 ++++++++++++++++++
1 file changed, 170 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll b/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll
index 6e7ff16b74139..dd6cf9bc6c592 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll
@@ -5,6 +5,22 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
define i32 @fneg_select_i32(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fneg_select_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = xor i32 %a, u0x80000000
%cmp = icmp eq i32 %cond, zeroinitializer
%select = select i1 %cmp, i32 %neg.a, i32 %b
@@ -12,6 +28,28 @@ define i32 @fneg_select_i32(i32 %cond, i32 %a, i32 %b) {
}
define <2 x i32> @fneg_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+; GCN-LABEL: fneg_select_v2i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
+; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_v2i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = xor <2 x i32> %a, splat (i32 u0x80000000)
%cmp = icmp eq <2 x i32> %cond, zeroinitializer
%select = select <2 x i1> %cmp, <2 x i32> %neg.a, <2 x i32> %b
@@ -19,6 +57,22 @@ define <2 x i32> @fneg_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b)
}
define i32 @fabs_select_i32(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fabs_select_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fabs_select_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = and i32 %a, u0x7fffffff
%cmp = icmp eq i32 %cond, zeroinitializer
%select = select i1 %cmp, i32 %neg.a, i32 %b
@@ -26,6 +80,28 @@ define i32 @fabs_select_i32(i32 %cond, i32 %a, i32 %b) {
}
define <2 x i32> @fabs_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+; GCN-LABEL: fabs_select_v2i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
+; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fabs_select_v2i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
+; GFX11-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = and <2 x i32> %a, splat (i32 u0x7fffffff)
%cmp = icmp eq <2 x i32> %cond, zeroinitializer
%select = select <2 x i1> %cmp, <2 x i32> %neg.a, <2 x i32> %b
@@ -33,6 +109,22 @@ define <2 x i32> @fabs_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b)
}
define i32 @fneg_fabs_select_i32(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fneg_fabs_select_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_or_b32_e32 v1, 0x80000000, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_fabs_select_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v1
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = or i32 %a, u0x80000000
%cmp = icmp eq i32 %cond, zeroinitializer
%select = select i1 %cmp, i32 %neg.a, i32 %b
@@ -40,6 +132,28 @@ define i32 @fneg_fabs_select_i32(i32 %cond, i32 %a, i32 %b) {
}
define <2 x i32> @fneg_fabs_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+; GCN-LABEL: fneg_fabs_select_v2i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_or_b32_e32 v2, 0x80000000, v2
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_or_b32_e32 v3, 0x80000000, v3
+; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_fabs_select_v2i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_or_b32_e32 v2, 0x80000000, v2
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_or_b32_e32 v3, 0x80000000, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = or <2 x i32> %a, splat (i32 u0x80000000)
%cmp = icmp eq <2 x i32> %cond, zeroinitializer
%select = select <2 x i1> %cmp, <2 x i32> %neg.a, <2 x i32> %b
@@ -47,6 +161,23 @@ define <2 x i32> @fneg_fabs_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32
}
define i64 @fneg_select_i64(i64 %cond, i64 %a, i64 %b) {
+; GCN-LABEL: fneg_select_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
+; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
+; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = xor i64 %a, u0x8000000000000000
%cmp = icmp eq i64 %cond, zeroinitializer
%select = select i1 %cmp, i64 %neg.a, i64 %b
@@ -54,6 +185,23 @@ define i64 @fneg_select_i64(i64 %cond, i64 %a, i64 %b) {
}
define i64 @fabs_select_i64(i64 %cond, i64 %a, i64 %b) {
+; GCN-LABEL: fabs_select_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GCN-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
+; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fabs_select_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v2 :: v_dual_and_b32 v1, 0x7fffffff, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = and i64 %a, u0x7fffffffffffffff
%cmp = icmp eq i64 %cond, zeroinitializer
%select = select i1 %cmp, i64 %neg.a, i64 %b
@@ -61,8 +209,30 @@ define i64 @fabs_select_i64(i64 %cond, i64 %a, i64 %b) {
}
define i64 @fneg_fabs_select_i64(i64 %cond, i64 %a, i64 %b) {
+; GCN-LABEL: fneg_fabs_select_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GCN-NEXT: v_or_b32_e32 v3, 0x80000000, v3
+; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_fabs_select_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
+; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = or i64 %a, u0x8000000000000000
%cmp = icmp eq i64 %cond, zeroinitializer
%select = select i1 %cmp, i64 %neg.a, i64 %b
ret i64 %select
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11-FAKE16: {{.*}}
+; GFX11-TRUE16: {{.*}}
+; GFX7: {{.*}}
+; GFX9: {{.*}}
>From 85032f2284d2d619b71af6c365b053e1fafba98e Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Mon, 7 Jul 2025 10:12:45 -0500
Subject: [PATCH 3/6] [AMDGPU][SDAG] Support source modifiers as integer on
select
Extend the DAGCombine() for select to directly support fneg and fabs
for i32, v2i32 and i64.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 77 ++++++++++++++++++-
.../AMDGPU/integer-select-source-modifiers.ll | 40 +++-------
2 files changed, 86 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e32accaba85fc..bec86877e1705 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4830,6 +4830,64 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
}
+static EVT IntToFloatVT(EVT VT) {
+ return VT = VT.isVector() ? MVT::getVectorVT(MVT::getFloatingPointVT(
+ VT.getScalarSizeInBits()),
+ VT.getVectorNumElements())
+ : MVT::getFloatingPointVT(VT.getFixedSizeInBits());
+}
+
+static SDValue BitwiseToSrcModifierOp(SDValue N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ unsigned Opc = N.getNode()->getOpcode();
+ if (Opc != ISD::AND && Opc != ISD::XOR && Opc != ISD::AND)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue LHS = N.getNode()->getOperand(0);
+ SDValue RHS = N.getNode()->getOperand(1);
+ ConstantSDNode *CRHS = isConstOrConstSplat(RHS);
+
+ if (!CRHS)
+ return SDValue();
+
+ EVT VT = RHS.getValueType();
+
+ assert((VT == MVT::i32 || VT == MVT::v2i32 || VT == MVT::i64) &&
+ "Expected i32, v2i32 or i64 value type.");
+
+ uint64_t Mask = 0;
+ if (VT.isVector()) {
+ SDValue Splat = DAG.getSplatValue(RHS);
+ const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat);
+ Mask = C->getZExtValue();
+ } else
+ Mask = CRHS->getZExtValue();
+
+ EVT FVT = IntToFloatVT(VT);
+ SDValue BC = DAG.getNode(ISD::BITCAST, SDLoc(N), FVT, LHS);
+
+ switch (Opc) {
+ case ISD::XOR:
+ if (Mask == 0x80000000u || Mask == 0x8000000000000000u)
+ return DAG.getNode(ISD::FNEG, SDLoc(N), FVT, BC);
+ return SDValue();
+ case ISD::OR:
+ if (Mask == 0x80000000u || Mask == 0x8000000000000000u) {
+ SDValue Abs = DAG.getNode(ISD::FNEG, SDLoc(N), FVT, BC);
+ return DAG.getNode(ISD::FABS, SDLoc(N), FVT, Abs);
+ }
+ return SDValue();
+ case ISD::AND:
+ if (Mask == 0x7fffffffu || Mask == 0x7fffffffffffffffu)
+ return DAG.getNode(ISD::FABS, SDLoc(N), FVT, BC);
+ return SDValue();
+ default:
+ return SDValue();
+ }
+}
+
SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
@@ -4864,12 +4922,25 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
}
if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {
- SDValue MinMax
- = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
+ SDValue MinMax =
+ combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
// Revisit this node so we can catch min3/max3/med3 patterns.
- //DCI.AddToWorklist(MinMax.getNode());
+ // DCI.AddToWorklist(MinMax.getNode());
return MinMax;
}
+
+ // Support source modifiers as integer.
+ if (VT == MVT::i32 || VT == MVT::v2i32 || VT == MVT::i64) {
+ SDLoc SL(N);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ if (SDValue SrcMod = BitwiseToSrcModifierOp(LHS, DCI)) {
+ SDValue FRHS = DAG.getNode(ISD::BITCAST, SL, VT, RHS);
+ SDValue FSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond, SrcMod, FRHS);
+ SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, FSelect);
+ return BC;
+ }
+ }
}
// There's no reason to not do this if the condition has other uses.
diff --git a/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll b/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll
index dd6cf9bc6c592..2db20e672c303 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-select-source-modifiers.ll
@@ -8,18 +8,15 @@ define i32 @fneg_select_i32(i32 %cond, i32 %a, i32 %b) {
; GCN-LABEL: fneg_select_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, -v1, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: fneg_select_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, -v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = xor i32 %a, u0x80000000
%cmp = icmp eq i32 %cond, zeroinitializer
@@ -31,24 +28,19 @@ define <2 x i32> @fneg_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b)
; GCN-LABEL: fneg_select_v2i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, v4, -v2, vcc
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v1, v5, -v3, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: fneg_select_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, -v2, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, -v3, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = xor <2 x i32> %a, splat (i32 u0x80000000)
%cmp = icmp eq <2 x i32> %cond, zeroinitializer
@@ -60,18 +52,15 @@ define i32 @fabs_select_i32(i32 %cond, i32 %a, i32 %b) {
; GCN-LABEL: fabs_select_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, |v1|, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: fabs_select_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, |v1|, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = and i32 %a, u0x7fffffff
%cmp = icmp eq i32 %cond, zeroinitializer
@@ -83,24 +72,19 @@ define <2 x i32> @fabs_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b)
; GCN-LABEL: fabs_select_v2i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GCN-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, v4, |v2|, vcc
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v1, v5, |v3|, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: fabs_select_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
-; GFX11-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, |v2|, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, |v3|, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.a = and <2 x i32> %a, splat (i32 u0x7fffffff)
%cmp = icmp eq <2 x i32> %cond, zeroinitializer
>From 9b486bc549f00efe7157d6f729216a67838c835e Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Mon, 7 Jul 2025 10:32:27 -0500
Subject: [PATCH 4/6] Simplify switch in BitwiseToSrcModifierOp()
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index bec86877e1705..d0b1ecbe5ec13 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4872,20 +4872,21 @@ static SDValue BitwiseToSrcModifierOp(SDValue N,
case ISD::XOR:
if (Mask == 0x80000000u || Mask == 0x8000000000000000u)
return DAG.getNode(ISD::FNEG, SDLoc(N), FVT, BC);
- return SDValue();
+ break;
case ISD::OR:
if (Mask == 0x80000000u || Mask == 0x8000000000000000u) {
SDValue Abs = DAG.getNode(ISD::FNEG, SDLoc(N), FVT, BC);
return DAG.getNode(ISD::FABS, SDLoc(N), FVT, Abs);
}
- return SDValue();
+ break;
case ISD::AND:
if (Mask == 0x7fffffffu || Mask == 0x7fffffffffffffffu)
return DAG.getNode(ISD::FABS, SDLoc(N), FVT, BC);
- return SDValue();
+ break;
default:
return SDValue();
}
+ return SDValue();
}
SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
>From ba276e39fa57527499c942fdb4ab6d85e0d06b26 Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Mon, 7 Jul 2025 10:46:53 -0500
Subject: [PATCH 5/6] [NFC] Correct typo in BitwiseToSrcModifierOp()
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d0b1ecbe5ec13..2ada636d8d2c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4875,8 +4875,8 @@ static SDValue BitwiseToSrcModifierOp(SDValue N,
break;
case ISD::OR:
if (Mask == 0x80000000u || Mask == 0x8000000000000000u) {
- SDValue Abs = DAG.getNode(ISD::FNEG, SDLoc(N), FVT, BC);
- return DAG.getNode(ISD::FABS, SDLoc(N), FVT, Abs);
+ SDValue Neg = DAG.getNode(ISD::FNEG, SDLoc(N), FVT, BC);
+ return DAG.getNode(ISD::FABS, SDLoc(N), FVT, Neg);
}
break;
case ISD::AND:
>From d5cf4e1d2d45609ea2a08500e1df644ad9836b6f Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Mon, 7 Jul 2025 11:11:31 -0500
Subject: [PATCH 6/6] Fix bitcast type in performSelectCombine()
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2ada636d8d2c7..fcd3628209312 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4931,13 +4931,15 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
}
// Support source modifiers as integer.
+ // (select c, (xor/or/and x, c), y) -> (bitcast (select c)))
if (VT == MVT::i32 || VT == MVT::v2i32 || VT == MVT::i64) {
SDLoc SL(N);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
if (SDValue SrcMod = BitwiseToSrcModifierOp(LHS, DCI)) {
- SDValue FRHS = DAG.getNode(ISD::BITCAST, SL, VT, RHS);
- SDValue FSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond, SrcMod, FRHS);
+ EVT FVT = IntToFloatVT(VT);
+ SDValue FRHS = DAG.getNode(ISD::BITCAST, SL, FVT, RHS);
+ SDValue FSelect = DAG.getNode(ISD::SELECT, SL, FVT, Cond, SrcMod, FRHS);
SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, FSelect);
return BC;
}
More information about the llvm-commits
mailing list