[llvm] ae43420 - AMDGPU/GlobalISel: Fix not selecting modifiers for f16 fma on gfx9
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 17 18:51:52 PST 2022
Author: Matt Arsenault
Date: 2022-11-17T18:51:45-08:00
New Revision: ae43420f39f5fea798141bb76b0b68c3d5e9dede
URL: https://github.com/llvm/llvm-project/commit/ae43420f39f5fea798141bb76b0b68c3d5e9dede
DIFF: https://github.com/llvm/llvm-project/commit/ae43420f39f5fea798141bb76b0b68c3d5e9dede.diff
LOG: AMDGPU/GlobalISel: Fix not selecting modifiers for f16 fma on gfx9
VOP3OpSel wasn't trying to match any modifiers. Just try to match the
basic case, like the DAG does.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 02ef7f834e323..5c12f2a5175da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3628,10 +3628,14 @@ AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
+
// FIXME: Handle op_sel
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
index b5afef282f657..2e9a374caef07 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
@@ -114,8 +114,7 @@ define half @test_half_sub_mul(half %x, half %y, half %z) {
; GFX9-CONTRACT-LABEL: test_half_sub_mul:
; GFX9-CONTRACT: ; %bb.0: ; %.entry
; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-CONTRACT-NEXT: v_xor_b32_e32 v2, 0x8000, v2
-; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-DENORM-LABEL: test_half_sub_mul:
@@ -136,8 +135,7 @@ define half @test_half_sub_mul(half %x, half %y, half %z) {
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-CONTRACT-NEXT: v_xor_b32_e32 v2, 0x8000, v2
-; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_half_sub_mul:
@@ -164,8 +162,7 @@ define half @test_half_sub_mul_rhs(half %x, half %y, half %z) {
; GFX9-CONTRACT-LABEL: test_half_sub_mul_rhs:
; GFX9-CONTRACT: ; %bb.0: ; %.entry
; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-CONTRACT-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2
; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-DENORM-LABEL: test_half_sub_mul_rhs:
@@ -186,8 +183,7 @@ define half @test_half_sub_mul_rhs(half %x, half %y, half %z) {
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-CONTRACT-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_half_sub_mul_rhs:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
index 0ba1a9b4002e7..e9d6ffabdedcb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
@@ -66,9 +66,7 @@ define half @test_f16_sub_ext_neg_mul(half %x, half %y, half %z) {
; GFX9-CONTRACT-LABEL: test_f16_sub_ext_neg_mul:
; GFX9-CONTRACT: ; %bb.0: ; %entry
; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-CONTRACT-NEXT: v_xor_b32_e32 v1, 0x8000, v1
-; GFX9-CONTRACT-NEXT: v_xor_b32_e32 v2, 0x8000, v2
-; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, -v1, -v2
; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-DENORM-LABEL: test_f16_sub_ext_neg_mul:
@@ -89,9 +87,7 @@ define half @test_f16_sub_ext_neg_mul(half %x, half %y, half %z) {
; GFX10-CONTRACT: ; %bb.0: ; %entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-CONTRACT-NEXT: v_xor_b32_e32 v1, 0x8000, v1
-; GFX10-CONTRACT-NEXT: v_xor_b32_e32 v2, 0x8000, v2
-; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, -v1, -v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_f16_sub_ext_neg_mul:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
index 6c2215d951dd1..d60cd7a5981a2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
@@ -122,6 +122,129 @@ define half @v_fma_f16(half %x, half %y, half %z) {
ret half %fma
}
+define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) {
+; GFX6-LABEL: v_fma_f16_fneg_lhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_fma_f16_fneg_lhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_fma_f16 v0, -v0, v1, v2
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_fma_f16_fneg_lhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f16 v0, -v0, v1, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_fma_f16_fneg_lhs:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f16_fneg_lhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.x = fneg half %x
+ %fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z)
+ ret half %fma
+}
+
+define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) {
+; GFX6-LABEL: v_fma_f16_fneg_rhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e64 v1, -v1
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_fma_f16_fneg_rhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_fma_f16 v0, v0, -v1, v2
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_fma_f16_fneg_rhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f16 v0, v0, -v1, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_fma_f16_fneg_rhs:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f16_fneg_rhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.y = fneg half %y
+ %fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z)
+ ret half %fma
+}
+
+define half @v_fma_f16_fneg_add(half %x, half %y, half %z) {
+; GFX6-LABEL: v_fma_f16_fneg_add:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: v_cvt_f32_f16_e64 v2, -v2
+; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_fma_f16_fneg_add:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_fma_f16_fneg_add:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_fma_f16_fneg_add:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f16_fneg_add:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.z = fneg half %z
+ %fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z)
+ ret half %fma
+}
+
define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
; GFX6-LABEL: v_fma_v2f16:
; GFX6: ; %bb.0:
More information about the llvm-commits
mailing list