[llvm] 1dfbc2e - [AMDGPU] Only enable mad/mac legacy f32 patterns if denormals may be flushed
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 9 09:18:00 PDT 2020
Author: Jay Foad
Date: 2020-10-09T17:08:38+01:00
New Revision: 1dfbc2ea144174f751ed3527e9b453e47a3a38e0
URL: https://github.com/llvm/llvm-project/commit/1dfbc2ea144174f751ed3527e9b453e47a3a38e0
DIFF: https://github.com/llvm/llvm-project/commit/1dfbc2ea144174f751ed3527e9b453e47a3a38e0.diff
LOG: [AMDGPU] Only enable mad/mac legacy f32 patterns if denormals may be flushed
Following on from D88890, this makes the newly added patterns
conditional on NoFP32Denormals. mad/mac f32 instructions always flush
denormals regardless of the MODE register setting, and I believe the
legacy variants do the same.
Differential Revision: https://reviews.llvm.org/D89123
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 068d8dc2a0fe..621c339ff105 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -881,10 +881,12 @@ let AddedComplexity = 9 in {
let OtherPredicates = [HasMadMacF32Insts] in {
def : FMADPat <f32, V_MAC_F32_e64, fmad>;
def : FMADPat <f32, V_MAC_F32_e64, AMDGPUfmad_ftz>;
+} // OtherPredicates = [HasMadMacF32Insts]
// Don't allow source modifiers. If there are any source modifiers then it's
// better to select mad instead of mac.
-let SubtargetPredicate = isGFX6GFX7GFX10 in
+let SubtargetPredicate = isGFX6GFX7GFX10,
+ OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in
def : GCNPat <
(f32 (fadd (AMDGPUfmul_legacy (VOP3NoMods f32:$src0),
(VOP3NoMods f32:$src1)),
@@ -892,7 +894,6 @@ def : GCNPat <
(V_MAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-} // OtherPredicates = [HasMadMacF32Insts]
let SubtargetPredicate = Has16BitInsts in {
def : FMADPat <f16, V_MAC_F16_e64, fmad>;
@@ -909,9 +910,10 @@ class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-let OtherPredicates = [HasMadMacF32Insts] in {
+let OtherPredicates = [HasMadMacF32Insts] in
def : FMADModsPat<f32, V_MAD_F32, AMDGPUfmad_ftz>;
+let OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in
def : GCNPat <
(f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod),
(VOP3Mods f32:$src1, i32:$src1_mod)),
@@ -919,7 +921,6 @@ def : GCNPat <
(V_MAD_LEGACY_F32 $src0_mod, $src0, $src1_mod, $src1,
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-} // OtherPredicates = [HasMadMacF32Insts]
let SubtargetPredicate = Has16BitInsts in
def : FMADModsPat<f16, V_MAD_F16, AMDGPUfmad_ftz>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
index ebe3ffd06ced..9b6370d21586 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
@@ -237,7 +237,52 @@ define float @v_mul_legacy_fneg_f32(float %a, float %b) {
ret float %result
}
-define float @v_mad_legacy_f32(float %a, float %b, float %c) {
+; Don't form mad/mac instructions because they don't support denormals.
+define float @v_add_mul_legacy_f32(float %a, float %b, float %c) {
+; GFX6-LABEL: v_add_mul_legacy_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_mul_legacy_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_mul_legacy_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_add_mul_legacy_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_add_mul_legacy_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX103-NEXT: s_setpc_b64 s[30:31]
+ %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
+ %add = fadd float %mul, %c
+ ret float %add
+}
+
+define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 {
; GFX6-LABEL: v_mad_legacy_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -279,7 +324,7 @@ define float @v_mad_legacy_f32(float %a, float %b, float %c) {
ret float %add
}
-define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) {
+define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) #2 {
; GFX6-LABEL: v_mad_legacy_fneg_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -512,3 +557,4 @@ declare float @llvm.amdgcn.fmul.legacy(float, float) #1
attributes #0 = { nounwind readnone speculatable willreturn }
attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { "denormal-fp-math-f32"="preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
index a91745b636d9..53809d32b087 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
@@ -38,6 +38,17 @@ define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, fl
ret void
}
+; Don't form mad/mac instructions because they don't support denormals.
+; GCN-LABEL: {{^}}test_add_mul_legacy_f32:
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
+; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
+define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+ %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
+ %add = fadd float %mul, %c
+ store float %add, float addrspace(1)* %out, align 4
+ ret void
+}
+
; GCN-LABEL: {{^}}test_mad_legacy_f32:
; GFX6: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
@@ -45,7 +56,7 @@ define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, fl
; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 {
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%add = fadd float %mul, %c
store float %add, float addrspace(1)* %out, align 4
@@ -56,7 +67,7 @@ define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %
; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}}
; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 {
%a.fneg = fneg float %a
%b.fneg = fneg float %b
%mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
@@ -70,3 +81,4 @@ declare float @llvm.amdgcn.fmul.legacy(float, float) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "denormal-fp-math"="preserve-sign" }
More information about the llvm-commits
mailing list