[llvm] r347265 - AMDGPU: Fix V_FMA_F16 selection on GFX9
Konstantin Zhuravlyov via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 19 13:10:16 PST 2018
Author: kzhuravl
Date: Mon Nov 19 13:10:16 2018
New Revision: 347265
URL: http://llvm.org/viewvc/llvm-project?rev=347265&view=rev
Log:
AMDGPU: Fix V_FMA_F16 selection on GFX9
GFX9 should select opsel version.
Differential Revision: https://reviews.llvm.org/D54545
Modified:
llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
llvm/trunk/test/CodeGen/AMDGPU/fdot2.ll
llvm/trunk/test/CodeGen/AMDGPU/fpext-free.ll
Modified: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td?rev=347265&r1=347264&r2=347265&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td Mon Nov 19 13:10:16 2018
@@ -438,13 +438,20 @@ def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_
let Predicates = [Has16BitInsts, isGFX9];
}
+def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
+ let Predicates = [Has16BitInsts, isVIOnly];
+}
+def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
+ let renamedInGFX9 = 1;
+ let Predicates = [Has16BitInsts, isGFX9];
+}
+
let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
let renamedInGFX9 = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
-def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
}
@@ -452,7 +459,6 @@ let SubtargetPredicate = isGFX9 in {
def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
-def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
} // End SubtargetPredicate = isGFX9
Modified: llvm/trunk/test/CodeGen/AMDGPU/fdot2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fdot2.ll?rev=347265&r1=347264&r2=347265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fdot2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fdot2.ll Mon Nov 19 13:10:16 2018
@@ -8,16 +8,16 @@
; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
; are not converted from f16 to f32.
; GCN-LABEL: {{^}}dotproduct_f16
-; GFX900: v_fma_legacy_f16
-; GFX900: v_fma_legacy_f16
+; GFX900: v_fma_f16
+; GFX900: v_fma_f16
; GFX906: v_mul_f16_e32
; GFX906: v_mul_f16_e32
-; GFX906-UNSAFE: v_fma_legacy_f16
+; GFX906-UNSAFE: v_fma_f16
; GFX906-CONTRACT: v_mac_f16_e32
-; GFX906-DENORM-CONTRACT: v_fma_legacy_f16
+; GFX906-DENORM-CONTRACT: v_fma_f16
define amdgpu_kernel void @dotproduct_f16(<2 x half> addrspace(1)* %src1,
<2 x half> addrspace(1)* %src2,
half addrspace(1)* nocapture %dst) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/fpext-free.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fpext-free.ll?rev=347265&r1=347264&r2=347265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fpext-free.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fpext-free.ll Mon Nov 19 13:10:16 2018
@@ -171,7 +171,7 @@ entry:
; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32:
; GFX9: v_mul_f16
-; GFX9: v_fma_legacy_f16
+; GFX9: v_fma_f16
; GFX9: v_cvt_f32_f16
; GFX9: v_add_f32_e32
define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
@@ -185,7 +185,7 @@ entry:
; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32:
; GFX9: v_mul_f16
-; GFX9: v_fma_legacy_f16
+; GFX9: v_fma_f16
; GFX9: v_cvt_f32_f16
; GFX9: v_add_f32_e32
define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
@@ -199,7 +199,7 @@ entry:
; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute:
; GFX9: v_mul_f16
-; GFX9: v_fma_legacy_f16
+; GFX9: v_fma_f16
; GFX9: v_cvt_f32_f16
; GFX9: v_add_f32_e32
define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
@@ -322,7 +322,7 @@ entry:
; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32:
; GFX9: v_mul_f16
-; GFX9: v_fma_legacy_f16
+; GFX9: v_fma_f16
; GFX9: v_cvt_f32_f16
; GFX9: v_sub_f32
; GCN: s_setpc_b64
@@ -363,7 +363,7 @@ entry:
; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute:
; GCN: s_waitcnt
; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4
-; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3
+; GFX9-NEXT: v_fma_f16 v1, v1, v2, v3
; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64
More information about the llvm-commits
mailing list