[llvm] ae43420 - AMDGPU/GlobalISel: Fix not selecting modifiers for f16 fma on gfx9

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 17 18:51:52 PST 2022


Author: Matt Arsenault
Date: 2022-11-17T18:51:45-08:00
New Revision: ae43420f39f5fea798141bb76b0b68c3d5e9dede

URL: https://github.com/llvm/llvm-project/commit/ae43420f39f5fea798141bb76b0b68c3d5e9dede
DIFF: https://github.com/llvm/llvm-project/commit/ae43420f39f5fea798141bb76b0b68c3d5e9dede.diff

LOG: AMDGPU/GlobalISel: Fix not selecting modifiers for f16 fma on gfx9

VOP3OpSel wasn't trying to match any modifiers. Just try to match the
basic case, like the DAG does.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 02ef7f834e323..5c12f2a5175da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3628,10 +3628,14 @@ AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
 
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
+  Register Src;
+  unsigned Mods;
+  std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
+
   // FIXME: Handle op_sel
   return {{
-      [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
-      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
   }};
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
index b5afef282f657..2e9a374caef07 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
@@ -114,8 +114,7 @@ define half @test_half_sub_mul(half %x, half %y, half %z) {
 ; GFX9-CONTRACT-LABEL: test_half_sub_mul:
 ; GFX9-CONTRACT:       ; %bb.0: ; %.entry
 ; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-CONTRACT-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
-; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, -v2
 ; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-DENORM-LABEL: test_half_sub_mul:
@@ -136,8 +135,7 @@ define half @test_half_sub_mul(half %x, half %y, half %z) {
 ; GFX10-CONTRACT:       ; %bb.0: ; %.entry
 ; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-CONTRACT-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-CONTRACT-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
-; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, -v2
 ; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-DENORM-LABEL: test_half_sub_mul:
@@ -164,8 +162,7 @@ define half @test_half_sub_mul_rhs(half %x, half %y, half %z) {
 ; GFX9-CONTRACT-LABEL: test_half_sub_mul_rhs:
 ; GFX9-CONTRACT:       ; %bb.0: ; %.entry
 ; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-CONTRACT-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, -v0, v1, v2
 ; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-DENORM-LABEL: test_half_sub_mul_rhs:
@@ -186,8 +183,7 @@ define half @test_half_sub_mul_rhs(half %x, half %y, half %z) {
 ; GFX10-CONTRACT:       ; %bb.0: ; %.entry
 ; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-CONTRACT-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-CONTRACT-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, -v0, v1, v2
 ; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-DENORM-LABEL: test_half_sub_mul_rhs:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
index 0ba1a9b4002e7..e9d6ffabdedcb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
@@ -66,9 +66,7 @@ define half @test_f16_sub_ext_neg_mul(half %x, half %y, half %z) {
 ; GFX9-CONTRACT-LABEL: test_f16_sub_ext_neg_mul:
 ; GFX9-CONTRACT:       ; %bb.0: ; %entry
 ; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-CONTRACT-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
-; GFX9-CONTRACT-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
-; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, v0, -v1, -v2
 ; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-DENORM-LABEL: test_f16_sub_ext_neg_mul:
@@ -89,9 +87,7 @@ define half @test_f16_sub_ext_neg_mul(half %x, half %y, half %z) {
 ; GFX10-CONTRACT:       ; %bb.0: ; %entry
 ; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-CONTRACT-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-CONTRACT-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
-; GFX10-CONTRACT-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
-; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, v0, -v1, -v2
 ; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-DENORM-LABEL: test_f16_sub_ext_neg_mul:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
index 6c2215d951dd1..d60cd7a5981a2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
@@ -122,6 +122,129 @@ define half @v_fma_f16(half %x, half %y, half %z) {
   ret half %fma
 }
 
+define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) {
+; GFX6-LABEL: v_fma_f16_fneg_lhs:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_cvt_f32_f16_e64 v0, -v0
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
+; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_fma_f16_fneg_lhs:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_fma_f16 v0, -v0, v1, v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_fma_f16_fneg_lhs:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_fma_f16 v0, -v0, v1, v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_fma_f16_fneg_lhs:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_fma_f16 v0, -v0, v1, v2
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f16_fneg_lhs:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    v_fma_f16 v0, -v0, v1, v2
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %neg.x = fneg half %x
+  %fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z)
+  ret half %fma
+}
+
+define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) {
+; GFX6-LABEL: v_fma_f16_fneg_rhs:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT:    v_cvt_f32_f16_e64 v1, -v1
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
+; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_fma_f16_fneg_rhs:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_fma_f16 v0, v0, -v1, v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_fma_f16_fneg_rhs:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_fma_f16 v0, v0, -v1, v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_fma_f16_fneg_rhs:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_fma_f16 v0, v0, -v1, v2
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f16_fneg_rhs:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    v_fma_f16 v0, v0, -v1, v2
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %neg.y = fneg half %y
+  %fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z)
+  ret half %fma
+}
+
+define half @v_fma_f16_fneg_add(half %x, half %y, half %z) {
+; GFX6-LABEL: v_fma_f16_fneg_add:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT:    v_cvt_f32_f16_e64 v2, -v2
+; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
+; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_fma_f16_fneg_add:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_fma_f16 v0, v0, v1, -v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_fma_f16_fneg_add:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_fma_f16 v0, v0, v1, -v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_fma_f16_fneg_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_fma_f16 v0, v0, v1, -v2
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f16_fneg_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    v_fma_f16 v0, v0, v1, -v2
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %neg.z = fneg half %z
+  %fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z)
+  ret half %fma
+}
+
 define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
 ; GFX6-LABEL: v_fma_v2f16:
 ; GFX6:       ; %bb.0:


        


More information about the llvm-commits mailing list